From 75ceb6f66f411acb0edbf5b5fcc41ae616ad3c47 Mon Sep 17 00:00:00 2001 From: vic Date: Fri, 27 Feb 2026 18:10:46 +0100 Subject: [PATCH 1/5] IVF-SQ --- cpp/CMakeLists.txt | 6 + cpp/bench/ann/CMakeLists.txt | 11 +- .../src/cuvs/cuvs_ann_bench_param_parser.h | 25 + cpp/bench/ann/src/cuvs/cuvs_benchmark.cu | 20 +- cpp/bench/ann/src/cuvs/cuvs_ivf_sq.cu | 10 + cpp/bench/ann/src/cuvs/cuvs_ivf_sq_wrapper.h | 141 ++++ cpp/include/cuvs/neighbors/ivf_sq.hpp | 336 +++++++++ cpp/src/neighbors/ivf_sq/ivf_sq_build.cuh | 664 ++++++++++++++++++ ...f_sq_build_extend_float_uint8_t_int64_t.cu | 89 +++ ...vf_sq_build_extend_half_uint8_t_int64_t.cu | 89 +++ cpp/src/neighbors/ivf_sq/ivf_sq_search.cuh | 549 +++++++++++++++ .../ivf_sq_search_float_uint8_t_int64_t.cu | 29 + .../ivf_sq_search_half_uint8_t_int64_t.cu | 29 + cpp/src/neighbors/ivf_sq/ivf_sq_serialize.cuh | 161 +++++ .../ivf_sq/ivf_sq_serialize_uint8_t.cu | 16 + cpp/src/neighbors/ivf_sq_index.cpp | 236 +++++++ cpp/tests/CMakeLists.txt | 7 + cpp/tests/neighbors/ann_ivf_sq.cuh | 457 ++++++++++++ .../ann_ivf_sq/test_float_uint8_t.cu | 21 + .../cuvs_bench/config/algorithms.yaml | 3 + .../cuvs_bench/config/algos/cuvs_ivf_sq.yaml | 16 + 21 files changed, 2913 insertions(+), 2 deletions(-) create mode 100644 cpp/bench/ann/src/cuvs/cuvs_ivf_sq.cu create mode 100644 cpp/bench/ann/src/cuvs/cuvs_ivf_sq_wrapper.h create mode 100644 cpp/include/cuvs/neighbors/ivf_sq.hpp create mode 100644 cpp/src/neighbors/ivf_sq/ivf_sq_build.cuh create mode 100644 cpp/src/neighbors/ivf_sq/ivf_sq_build_extend_float_uint8_t_int64_t.cu create mode 100644 cpp/src/neighbors/ivf_sq/ivf_sq_build_extend_half_uint8_t_int64_t.cu create mode 100644 cpp/src/neighbors/ivf_sq/ivf_sq_search.cuh create mode 100644 cpp/src/neighbors/ivf_sq/ivf_sq_search_float_uint8_t_int64_t.cu create mode 100644 cpp/src/neighbors/ivf_sq/ivf_sq_search_half_uint8_t_int64_t.cu create mode 100644 cpp/src/neighbors/ivf_sq/ivf_sq_serialize.cuh create mode 100644 cpp/src/neighbors/ivf_sq/ivf_sq_serialize_uint8_t.cu create mode 100644 cpp/src/neighbors/ivf_sq_index.cpp create mode 100644 cpp/tests/neighbors/ann_ivf_sq.cuh create mode 100644 cpp/tests/neighbors/ann_ivf_sq/test_float_uint8_t.cu create mode 100644 python/cuvs_bench/cuvs_bench/config/algos/cuvs_ivf_sq.yaml diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index d90579812a..610b9eff3f 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -621,6 +621,12 @@ if(NOT BUILD_CPU_ONLY) src/neighbors/ivf_pq/detail/ivf_pq_transform_half_int64_t.cu src/neighbors/ivf_pq/detail/ivf_pq_transform_int8_t_int64_t.cu src/neighbors/ivf_pq/detail/ivf_pq_transform_uint8_t_int64_t.cu + src/neighbors/ivf_sq_index.cpp + src/neighbors/ivf_sq/ivf_sq_build_extend_float_uint8_t_int64_t.cu + src/neighbors/ivf_sq/ivf_sq_build_extend_half_uint8_t_int64_t.cu + src/neighbors/ivf_sq/ivf_sq_search_float_uint8_t_int64_t.cu + src/neighbors/ivf_sq/ivf_sq_search_half_uint8_t_int64_t.cu + src/neighbors/ivf_sq/ivf_sq_serialize_uint8_t.cu src/neighbors/knn_merge_parts.cu src/neighbors/nn_descent.cu src/neighbors/nn_descent_float.cu diff --git a/cpp/bench/ann/CMakeLists.txt b/cpp/bench/ann/CMakeLists.txt index 8d254c0933..ae42abeb35 100644 --- a/cpp/bench/ann/CMakeLists.txt +++ b/cpp/bench/ann/CMakeLists.txt @@ -1,6 +1,6 @@ # ============================================================================= # cmake-format: off -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 # cmake-format: on # ============================================================================= @@ -24,6 +24,7 @@ option(CUVS_ANN_BENCH_USE_FAISS_CPU_IVF_FLAT "Include faiss' cpu ivf flat algori option(CUVS_ANN_BENCH_USE_FAISS_CPU_IVF_PQ "Include faiss' cpu ivf pq algorithm in benchmark" ON) option(CUVS_ANN_BENCH_USE_FAISS_CPU_HNSW_FLAT "Include faiss' hnsw algorithm in benchmark" ON) option(CUVS_ANN_BENCH_USE_CUVS_IVF_FLAT "Include cuVS ivf flat algorithm in benchmark" ON) +option(CUVS_ANN_BENCH_USE_CUVS_IVF_SQ "Include cuVS ivf sq algorithm in benchmark" ON) option(CUVS_ANN_BENCH_USE_CUVS_IVF_PQ "Include cuVS ivf pq algorithm in benchmark" ON) option(CUVS_ANN_BENCH_USE_CUVS_CAGRA "Include cuVS CAGRA in benchmark" ON) option(CUVS_ANN_BENCH_USE_CUVS_BRUTE_FORCE "Include cuVS brute force knn in benchmark" ON) @@ -80,6 +81,7 @@ set(CUVS_USE_FAISS_STATIC ON) if(BUILD_CPU_ONLY) set(CUVS_FAISS_ENABLE_GPU OFF) set(CUVS_ANN_BENCH_USE_CUVS_IVF_FLAT OFF) + set(CUVS_ANN_BENCH_USE_CUVS_IVF_SQ OFF) set(CUVS_ANN_BENCH_USE_CUVS_IVF_PQ OFF) set(CUVS_ANN_BENCH_USE_CUVS_CAGRA OFF) set(CUVS_ANN_BENCH_USE_CUVS_BRUTE_FORCE OFF) @@ -97,6 +99,7 @@ set(CUVS_ANN_BENCH_USE_CUVS OFF) if(CUVS_ANN_BENCH_USE_CUVS_IVF_PQ OR CUVS_ANN_BENCH_USE_CUVS_BRUTE_FORCE OR CUVS_ANN_BENCH_USE_CUVS_IVF_FLAT + OR CUVS_ANN_BENCH_USE_CUVS_IVF_SQ OR CUVS_ANN_BENCH_USE_CUVS_CAGRA OR CUVS_ANN_BENCH_USE_CUVS_CAGRA_HNSWLIB OR CUVS_KNN_BENCH_USE_CUVS_BRUTE_FORCE @@ -242,6 +245,12 @@ if(CUVS_ANN_BENCH_USE_CUVS_IVF_FLAT) ) endif() +if(CUVS_ANN_BENCH_USE_CUVS_IVF_SQ) + ConfigureAnnBench( + NAME CUVS_IVF_SQ PATH src/cuvs/cuvs_benchmark.cu src/cuvs/cuvs_ivf_sq.cu LINKS cuvs + ) +endif() + if(CUVS_ANN_BENCH_USE_CUVS_BRUTE_FORCE) ConfigureAnnBench(NAME CUVS_BRUTE_FORCE PATH src/cuvs/cuvs_benchmark.cu LINKS cuvs) endif() diff --git a/cpp/bench/ann/src/cuvs/cuvs_ann_bench_param_parser.h b/cpp/bench/ann/src/cuvs/cuvs_ann_bench_param_parser.h index faa3345d1f..4bc7505dc4 100644 --- a/cpp/bench/ann/src/cuvs/cuvs_ann_bench_param_parser.h +++ b/cpp/bench/ann/src/cuvs/cuvs_ann_bench_param_parser.h @@ -35,6 +35,11 @@ extern template class cuvs::bench::cuvs_cagra; extern template class cuvs::bench::cuvs_cagra; #endif +#ifdef CUVS_ANN_BENCH_USE_CUVS_IVF_SQ +#include "cuvs_ivf_sq_wrapper.h" +extern template class cuvs::bench::cuvs_ivf_sq; +extern template class cuvs::bench::cuvs_ivf_sq; +#endif #ifdef CUVS_ANN_BENCH_USE_CUVS_MG #include "cuvs_ivf_flat_wrapper.h" #include "cuvs_mg_ivf_flat_wrapper.h" @@ -86,6 +91,26 @@ void parse_search_param(const nlohmann::json& conf, } #endif +#ifdef CUVS_ANN_BENCH_USE_CUVS_IVF_SQ +template +void parse_build_param(const nlohmann::json& conf, + typename cuvs::bench::cuvs_ivf_sq::build_param& param) +{ + param.n_lists = conf.at("nlist"); + if (conf.contains("niter")) { param.kmeans_n_iters = conf.at("niter"); } + if (conf.contains("ratio")) { + param.kmeans_trainset_fraction = 1.0 / static_cast(conf.at("ratio")); + } +} + +template +void parse_search_param(const nlohmann::json& conf, + typename cuvs::bench::cuvs_ivf_sq::search_param& param) +{ + param.ivf_sq_params.n_probes = conf.at("nprobe"); +} +#endif + #if defined(CUVS_ANN_BENCH_USE_CUVS_IVF_PQ) || defined(CUVS_ANN_BENCH_USE_CUVS_CAGRA) || \ defined(CUVS_ANN_BENCH_USE_CUVS_CAGRA_HNSWLIB) || defined(CUVS_ANN_BENCH_USE_CUVS_MG) || \ defined(CUVS_ANN_BENCH_USE_CUVS_CAGRA_DISKANN) diff --git a/cpp/bench/ann/src/cuvs/cuvs_benchmark.cu b/cpp/bench/ann/src/cuvs/cuvs_benchmark.cu index aebac654c2..22aeb31c38 100644 --- a/cpp/bench/ann/src/cuvs/cuvs_benchmark.cu +++ b/cpp/bench/ann/src/cuvs/cuvs_benchmark.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -84,6 +84,15 @@ auto create_algo(const std::string& algo_name, } } #endif +#ifdef CUVS_ANN_BENCH_USE_CUVS_IVF_SQ + if constexpr (std::is_same_v || std::is_same_v) { + if (algo_name == "cuvs_ivf_sq") { + typename cuvs::bench::cuvs_ivf_sq::build_param param; + parse_build_param(conf, param); + a = std::make_unique>(metric, dim, param); + } + } +#endif #ifdef CUVS_ANN_BENCH_USE_CUVS_IVF_PQ if (algo_name == "raft_ivf_pq" || algo_name == "cuvs_ivf_pq") { typename cuvs::bench::cuvs_ivf_pq::build_param param; @@ -151,6 +160,15 @@ auto create_search_param(const std::string& algo_name, const nlohmann::json& con } } #endif +#ifdef CUVS_ANN_BENCH_USE_CUVS_IVF_SQ + if constexpr (std::is_same_v || std::is_same_v) { + if (algo_name == "cuvs_ivf_sq") { + auto param = std::make_unique::search_param>(); + parse_search_param(conf, *param); + return param; + } + } +#endif #ifdef CUVS_ANN_BENCH_USE_CUVS_IVF_PQ if (algo_name == "raft_ivf_pq" || algo_name == "cuvs_ivf_pq") { auto param = std::make_unique::search_param>(); diff --git a/cpp/bench/ann/src/cuvs/cuvs_ivf_sq.cu b/cpp/bench/ann/src/cuvs/cuvs_ivf_sq.cu new file mode 100644 index 0000000000..ec41324c8d --- /dev/null +++ b/cpp/bench/ann/src/cuvs/cuvs_ivf_sq.cu @@ -0,0 +1,10 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ +#include "cuvs_ivf_sq_wrapper.h" + +namespace cuvs::bench { +template class cuvs_ivf_sq; +template class cuvs_ivf_sq; +} // namespace cuvs::bench diff --git a/cpp/bench/ann/src/cuvs/cuvs_ivf_sq_wrapper.h b/cpp/bench/ann/src/cuvs/cuvs_ivf_sq_wrapper.h new file mode 100644 index 0000000000..1503e6bb84 --- /dev/null +++ b/cpp/bench/ann/src/cuvs/cuvs_ivf_sq_wrapper.h @@ -0,0 +1,141 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ +#pragma once + +#include "../common/ann_types.hpp" +#include "cuvs_ann_bench_utils.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace cuvs::bench { + +template +class cuvs_ivf_sq : public algo, public algo_gpu { + public: + using search_param_base = typename algo::search_param; + + struct search_param : public search_param_base { + cuvs::neighbors::ivf_sq::search_params ivf_sq_params; + }; + + using build_param = cuvs::neighbors::ivf_sq::index_params; + + cuvs_ivf_sq(Metric metric, int dim, const build_param& param) + : algo(metric, dim), index_params_(param), dimension_(dim) + { + index_params_.metric = parse_metric_type(metric); + index_params_.conservative_memory_allocation = true; + RAFT_CUDA_TRY(cudaGetDevice(&device_)); + } + + void build(const T* dataset, size_t nrow) final; + + void set_search_param(const search_param_base& param, const void* filter_bitset) override; + + void search(const T* queries, + int batch_size, + int k, + algo_base::index_type* neighbors, + float* distances) const override; + + [[nodiscard]] auto get_sync_stream() const noexcept -> cudaStream_t override + { + return handle_.get_sync_stream(); + } + + [[nodiscard]] auto get_preference() const -> algo_property override + { + algo_property property; + property.dataset_memory_type = MemoryType::kHostMmap; + property.query_memory_type = MemoryType::kDevice; + return property; + } + + void save(const std::string& file) const override; + void load(const std::string&) override; + std::unique_ptr> copy() override; + + private: + configured_raft_resources handle_{}; + build_param index_params_; + cuvs::neighbors::ivf_sq::search_params search_params_; + std::shared_ptr> index_; + int device_; + int dimension_; + + std::shared_ptr filter_; +}; + +template +void cuvs_ivf_sq::build(const T* dataset, size_t nrow) +{ + size_t n_streams = 1; + raft::resource::set_cuda_stream_pool(handle_, std::make_shared(n_streams)); + index_ = std::make_shared>( + std::move(cuvs::neighbors::ivf_sq::build( + handle_, + index_params_, + raft::make_host_matrix_view(dataset, nrow, dimension_)))); +} + +template +void cuvs_ivf_sq::set_search_param(const search_param_base& param, const void* filter_bitset) +{ + filter_ = make_cuvs_filter(filter_bitset, index_->size()); + auto sp = dynamic_cast(param); + search_params_ = sp.ivf_sq_params; + assert(search_params_.n_probes <= index_params_.n_lists); +} + +template +void cuvs_ivf_sq::save(const std::string& file) const +{ + cuvs::neighbors::ivf_sq::serialize(handle_, file, *index_); +} + +template +void cuvs_ivf_sq::load(const std::string& file) +{ + index_ = + std::make_shared>(handle_, index_params_, this->dim_); + cuvs::neighbors::ivf_sq::deserialize(handle_, file, index_.get()); +} + +template +std::unique_ptr> cuvs_ivf_sq::copy() +{ + return std::make_unique>(*this); +} + +template +void cuvs_ivf_sq::search( + const T* queries, int batch_size, int k, algo_base::index_type* neighbors, float* distances) const +{ + static_assert(sizeof(algo_base::index_type) == sizeof(int64_t)); + + cuvs::neighbors::ivf_sq::search( + handle_, + search_params_, + *index_, + raft::make_device_matrix_view(queries, batch_size, index_->dim()), + raft::make_device_matrix_view( + reinterpret_cast(neighbors), batch_size, k), + raft::make_device_matrix_view(distances, batch_size, k), + *filter_); +} + +} // namespace cuvs::bench diff --git a/cpp/include/cuvs/neighbors/ivf_sq.hpp b/cpp/include/cuvs/neighbors/ivf_sq.hpp new file mode 100644 index 0000000000..2f09751e95 --- /dev/null +++ b/cpp/include/cuvs/neighbors/ivf_sq.hpp @@ -0,0 +1,336 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include "common.hpp" +#include +#include +#include +#include +#include +#include +#include + +namespace cuvs::neighbors::ivf_sq { + +/** + * @defgroup ivf_sq_cpp_index_params IVF-SQ index build parameters + * @{ + */ + +constexpr static uint32_t kIndexGroupSize = 32; + +struct index_params : cuvs::neighbors::index_params { + uint32_t n_lists = 1024; + uint32_t kmeans_n_iters = 20; + double kmeans_trainset_fraction = 0.5; + bool adaptive_centers = false; + bool conservative_memory_allocation = false; + bool add_data_on_build = true; +}; + +struct search_params : cuvs::neighbors::search_params { + uint32_t n_probes = 20; +}; + +static_assert(std::is_aggregate_v); +static_assert(std::is_aggregate_v); + +/** + * @} + */ + +/** + * @defgroup ivf_sq_cpp_list_spec IVF-SQ list storage spec + * @{ + */ + +template +struct list_spec { + static_assert(std::is_same_v, "IVF-SQ code type IdxT must be uint8_t"); + + using value_type = IdxT; + using list_extents = raft::matrix_extent; + using index_type = ExtT; + + SizeT align_max; + SizeT align_min; + uint32_t dim; + + constexpr list_spec(uint32_t dim, bool conservative_memory_allocation) + : dim(dim), + align_min(kIndexGroupSize), + align_max(conservative_memory_allocation ? kIndexGroupSize : 1024) + { + } + + template + constexpr explicit list_spec(const list_spec& other_spec) + : dim{other_spec.dim}, align_min{other_spec.align_min}, align_max{other_spec.align_max} + { + } + + static constexpr uint32_t kVecLen = 16; + + constexpr auto make_list_extents(SizeT n_rows) const -> list_extents + { + uint32_t padded = ((dim + kVecLen - 1) / kVecLen) * kVecLen; + return raft::make_extents(n_rows, padded); + } +}; + +template +using list_data = ivf::list; + +/** + * @} + */ + +/** + * @defgroup ivf_sq_cpp_index IVF-SQ index + * @{ + */ + +/** + * @brief IVF-SQ index. + * + * @tparam IdxT SQ code type. Only uint8_t (8-bit, codes in [0,255]) for now. + * + * No member depends on the raw data type T (float, half). T appears only + * in the free-function signatures (build, search, extend) where input data + * is consumed, following the IVF-PQ pattern. + */ +template +struct index : cuvs::neighbors::index { + static_assert(std::is_same_v, "IVF-SQ code type IdxT must be uint8_t for now."); + + using index_params_type = ivf_sq::index_params; + using search_params_type = ivf_sq::search_params; + using code_type = IdxT; + + static constexpr uint32_t sq_bits = sizeof(IdxT) * 8; + + public: + index(const index&) = delete; + index(index&&) = default; + index& operator=(const index&) = delete; + index& operator=(index&&) = default; + ~index() = default; + + index(raft::resources const& res); + index(raft::resources const& res, const index_params& params, uint32_t dim); + index(raft::resources const& res, + cuvs::distance::DistanceType metric, + uint32_t n_lists, + uint32_t dim, + bool adaptive_centers, + bool conservative_memory_allocation); + + cuvs::distance::DistanceType metric() const noexcept; + bool adaptive_centers() const noexcept; + int64_t size() const noexcept; + uint32_t dim() const noexcept; + uint32_t n_lists() const noexcept; + bool conservative_memory_allocation() const noexcept; + + raft::device_vector_view list_sizes() noexcept; + raft::device_vector_view list_sizes() const noexcept; + + raft::device_matrix_view centers() noexcept; + raft::device_matrix_view centers() const noexcept; + + std::optional> center_norms() noexcept; + std::optional> center_norms() const noexcept; + void allocate_center_norms(raft::resources const& res); + + raft::device_vector_view sq_vmin() noexcept; + raft::device_vector_view sq_vmin() const noexcept; + + raft::device_vector_view sq_delta() noexcept; + raft::device_vector_view sq_delta() const noexcept; + + raft::host_vector_view accum_sorted_sizes() noexcept; + [[nodiscard]] raft::host_vector_view accum_sorted_sizes() const noexcept; + + raft::device_vector_view data_ptrs() noexcept; + raft::device_vector_view data_ptrs() const noexcept; + + raft::device_vector_view inds_ptrs() noexcept; + raft::device_vector_view inds_ptrs() const noexcept; + + std::vector>>& lists() noexcept; + const std::vector>>& lists() const noexcept; + + void check_consistency(); + + private: + cuvs::distance::DistanceType metric_; + bool adaptive_centers_; + bool conservative_memory_allocation_; + + std::vector>> lists_; + raft::device_vector list_sizes_; + raft::device_matrix centers_; + std::optional> center_norms_; + raft::device_vector sq_vmin_; + raft::device_vector sq_delta_; + + raft::device_vector data_ptrs_; + raft::device_vector inds_ptrs_; + raft::host_vector accum_sorted_sizes_; +}; + +/** + * @} + */ + +/** + * @defgroup ivf_sq_cpp_index_build IVF-SQ index build + * @{ + */ + +auto build(raft::resources const& handle, + const cuvs::neighbors::ivf_sq::index_params& index_params, + raft::device_matrix_view dataset) + -> cuvs::neighbors::ivf_sq::index; + +void build(raft::resources const& handle, + const cuvs::neighbors::ivf_sq::index_params& index_params, + raft::device_matrix_view dataset, + cuvs::neighbors::ivf_sq::index& idx); + +auto build(raft::resources const& handle, + const cuvs::neighbors::ivf_sq::index_params& index_params, + raft::device_matrix_view dataset) + -> cuvs::neighbors::ivf_sq::index; + +void build(raft::resources const& handle, + const cuvs::neighbors::ivf_sq::index_params& index_params, + raft::device_matrix_view dataset, + cuvs::neighbors::ivf_sq::index& idx); + +auto build(raft::resources const& handle, + const cuvs::neighbors::ivf_sq::index_params& index_params, + raft::host_matrix_view dataset) + -> cuvs::neighbors::ivf_sq::index; + +void build(raft::resources const& handle, + const cuvs::neighbors::ivf_sq::index_params& index_params, + raft::host_matrix_view dataset, + cuvs::neighbors::ivf_sq::index& idx); + +auto build(raft::resources const& handle, + const cuvs::neighbors::ivf_sq::index_params& index_params, + raft::host_matrix_view dataset) + -> cuvs::neighbors::ivf_sq::index; + +void build(raft::resources const& handle, + const cuvs::neighbors::ivf_sq::index_params& index_params, + raft::host_matrix_view dataset, + cuvs::neighbors::ivf_sq::index& idx); + +/** + * @} + */ + +/** + * @defgroup ivf_sq_cpp_index_extend IVF-SQ index extend + * @{ + */ + +auto extend(raft::resources const& handle, + raft::device_matrix_view new_vectors, + std::optional> new_indices, + const cuvs::neighbors::ivf_sq::index& orig_index) + -> cuvs::neighbors::ivf_sq::index; + +void extend(raft::resources const& handle, + raft::device_matrix_view new_vectors, + std::optional> new_indices, + cuvs::neighbors::ivf_sq::index* idx); + +auto extend(raft::resources const& handle, + raft::device_matrix_view new_vectors, + std::optional> new_indices, + const cuvs::neighbors::ivf_sq::index& orig_index) + -> cuvs::neighbors::ivf_sq::index; + +void extend(raft::resources const& handle, + raft::device_matrix_view new_vectors, + std::optional> new_indices, + cuvs::neighbors::ivf_sq::index* idx); + +auto extend(raft::resources const& handle, + raft::host_matrix_view new_vectors, + std::optional> new_indices, + const cuvs::neighbors::ivf_sq::index& orig_index) + -> cuvs::neighbors::ivf_sq::index; + +void extend(raft::resources const& handle, + raft::host_matrix_view new_vectors, + std::optional> new_indices, + cuvs::neighbors::ivf_sq::index* idx); + +auto extend(raft::resources const& handle, + raft::host_matrix_view new_vectors, + std::optional> new_indices, + const cuvs::neighbors::ivf_sq::index& orig_index) + -> cuvs::neighbors::ivf_sq::index; + +void extend(raft::resources const& handle, + raft::host_matrix_view new_vectors, + std::optional> new_indices, + cuvs::neighbors::ivf_sq::index* idx); + +/** + * @} + */ + +/** + * @defgroup ivf_sq_cpp_index_search IVF-SQ index search + * @{ + */ + +void search(raft::resources const& handle, + const cuvs::neighbors::ivf_sq::search_params& params, + const cuvs::neighbors::ivf_sq::index& index, + raft::device_matrix_view queries, + raft::device_matrix_view neighbors, + raft::device_matrix_view distances, + const cuvs::neighbors::filtering::base_filter& sample_filter = + cuvs::neighbors::filtering::none_sample_filter{}); + +void search(raft::resources const& handle, + const cuvs::neighbors::ivf_sq::search_params& params, + const cuvs::neighbors::ivf_sq::index& index, + raft::device_matrix_view queries, + raft::device_matrix_view neighbors, + raft::device_matrix_view distances, + const cuvs::neighbors::filtering::base_filter& sample_filter = + cuvs::neighbors::filtering::none_sample_filter{}); + +/** + * @} + */ + +/** + * @defgroup ivf_sq_cpp_index_serialize IVF-SQ index serialize + * @{ + */ + +void serialize(raft::resources const& handle, + const std::string& filename, + const cuvs::neighbors::ivf_sq::index& index); + +void deserialize(raft::resources const& handle, + const std::string& filename, + cuvs::neighbors::ivf_sq::index* index); + +/** + * @} + */ + +} // namespace cuvs::neighbors::ivf_sq diff --git a/cpp/src/neighbors/ivf_sq/ivf_sq_build.cuh b/cpp/src/neighbors/ivf_sq/ivf_sq_build.cuh new file mode 100644 index 0000000000..6c46a20e65 --- /dev/null +++ b/cpp/src/neighbors/ivf_sq/ivf_sq_build.cuh @@ -0,0 +1,664 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include "../../core/nvtx.hpp" +#include "../ivf_common.cuh" +#include "../ivf_list.cuh" + +#include +#include +#include + +#include "../../cluster/kmeans_balanced.cuh" +#include "../detail/ann_utils.cuh" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include +#include + +namespace cuvs::neighbors::ivf_sq { +using namespace cuvs::spatial::knn::detail; // NOLINT + +namespace detail { + +struct ColMinMaxPair { + float min_val; + float max_val; +}; + +struct ColMinMaxOp { + __device__ __forceinline__ ColMinMaxPair operator()(const ColMinMaxPair& a, + const ColMinMaxPair& b) const + { + return {fminf(a.min_val, b.min_val), fmaxf(a.max_val, b.max_val)}; + } +}; + +/** + * Fused per-column min+max in a single pass (2x less DRAM traffic than two + * separate reductions). One thread block per column; threads stride over + * rows and feed CUB BlockReduce with a combined min/max pair. + * + * Row-loop is manually 4x-unrolled so the compiler can overlap four + * independent __ldg requests in the memory pipeline. + */ +template +__launch_bounds__(BlockSize) RAFT_KERNEL fused_column_minmax_kernel(const float* __restrict__ data, + float* __restrict__ col_min, + float* __restrict__ col_max, + int64_t n_rows, + uint32_t dim) +{ + using BlockReduce = cub::BlockReduce; + __shared__ typename BlockReduce::TempStorage temp_storage; + + const uint32_t col = blockIdx.x; + if (col >= dim) return; + + ColMinMaxPair agg = {std::numeric_limits::max(), std::numeric_limits::lowest()}; + + const int64_t stride = static_cast(BlockSize); + int64_t row = static_cast(threadIdx.x); + + for (; row + 3 * stride < n_rows; row += 4 * stride) { + float v0 = __ldg(&data[row * dim + col]); + float v1 = __ldg(&data[(row + stride) * dim + col]); + float v2 = __ldg(&data[(row + 2 * stride) * dim + col]); + float v3 = __ldg(&data[(row + 3 * stride) * dim + col]); + agg.min_val = fminf(agg.min_val, fminf(fminf(v0, v1), fminf(v2, v3))); + agg.max_val = fmaxf(agg.max_val, fmaxf(fmaxf(v0, v1), fmaxf(v2, v3))); + } + for (; row < n_rows; row += stride) { + float val = __ldg(&data[row * dim + col]); + agg.min_val = fminf(agg.min_val, val); + agg.max_val = fmaxf(agg.max_val, val); + } + + agg = BlockReduce(temp_storage).Reduce(agg, ColMinMaxOp()); + + if (threadIdx.x == 0) { + col_min[col] = agg.min_val; + col_max[col] = agg.max_val; + } +} + +template +auto clone(const raft::resources& res, const index& source) -> index +{ + auto stream = raft::resource::get_cuda_stream(res); + + index target(res, + source.metric(), + source.n_lists(), + source.dim(), + source.adaptive_centers(), + source.conservative_memory_allocation()); + + raft::copy(target.list_sizes().data_handle(), + source.list_sizes().data_handle(), + source.list_sizes().size(), + stream); + raft::copy(target.centers().data_handle(), + source.centers().data_handle(), + source.centers().size(), + stream); + if (source.center_norms().has_value()) { + target.allocate_center_norms(res); + raft::copy(target.center_norms()->data_handle(), + source.center_norms()->data_handle(), + source.center_norms()->size(), + stream); + } + raft::copy(target.sq_vmin().data_handle(), + source.sq_vmin().data_handle(), + source.sq_vmin().size(), + stream); + raft::copy(target.sq_delta().data_handle(), + source.sq_delta().data_handle(), + source.sq_delta().size(), + stream); + target.lists() = source.lists(); + ivf::detail::recompute_internal_state(res, target); + return target; +} + +/** + * Kernel to encode float residuals to uint8_t SQ codes and write them interleaved. + * + * Uses warp-per-vector parallelism: each warp cooperatively encodes one vector + * so that reads from residuals/vmin/delta are coalesced across the 32 lanes. + * Lane 0 handles the atomic position assignment and the index write. + */ +template +__launch_bounds__(BlockSize) RAFT_KERNEL encode_and_fill_kernel(const uint32_t* labels, + const float* residuals, + const int64_t* source_ixs, + uint8_t** list_data_ptrs, + int64_t** list_index_ptrs, + uint32_t* list_sizes_ptr, + const float* vmin, + const float* delta, + int64_t n_rows, + uint32_t dim, + int64_t batch_offset) +{ + constexpr uint32_t kWarpSize = kIndexGroupSize; + constexpr uint32_t kWarpsPerBlock = BlockSize / kWarpSize; + + const uint32_t lane_id = threadIdx.x % kWarpSize; + const int64_t row_id = + int64_t(threadIdx.x / kWarpSize) + int64_t(blockIdx.x) * int64_t(kWarpsPerBlock); + if (row_id >= n_rows) return; + + uint32_t list_id = 0; + uint32_t inlist_id = 0; + if (lane_id == 0) { + auto source_ix = source_ixs == nullptr ? row_id + batch_offset : source_ixs[row_id]; + list_id = labels[row_id]; + inlist_id = atomicAdd(list_sizes_ptr + list_id, 1); + list_index_ptrs[list_id][inlist_id] = source_ix; + } + list_id = __shfl_sync(0xFFFFFFFF, list_id, 0); + inlist_id = __shfl_sync(0xFFFFFFFF, inlist_id, 0); + + using interleaved_group = raft::Pow2; + auto group_offset = interleaved_group::roundDown(inlist_id); + auto ingroup_id = interleaved_group::mod(inlist_id); + + constexpr uint32_t veclen = list_spec::kVecLen; + uint32_t padded_dim = ((dim + veclen - 1) / veclen) * veclen; + auto* list_dat = list_data_ptrs[list_id] + static_cast(group_offset) * padded_dim; + const float* src = residuals + row_id * dim; + + for (uint32_t d = lane_id; d < padded_dim; d += kWarpSize) { + uint8_t out; + if (d < dim) { + float val = src[d]; + float dv = delta[d]; + float v = vmin[d]; + float code = (dv > 0.0f) ? roundf((val - v) / dv) : 0.0f; + out = static_cast(fminf(fmaxf(code, 0.0f), 255.0f)); + } else { + out = 0; + } + uint32_t l = (d / veclen) * veclen; + uint32_t j = d % veclen; + list_dat[l * kIndexGroupSize + ingroup_id * veclen + j] = out; + } +} + +/** + * Compute residuals: residual[i] = cast(x_i) - centers[labels[i]] + */ +template +RAFT_KERNEL compute_residuals_kernel(const T* dataset, + const float* centers, + const uint32_t* labels, + float* residuals, + int64_t n_rows, + uint32_t dim) +{ + int64_t i = int64_t(blockIdx.x) * blockDim.x + threadIdx.x; + uint32_t j = blockIdx.y * blockDim.y + threadIdx.y; + if (i >= n_rows || j >= dim) return; + + float val = utils::mapping{}(dataset[i * dim + j]); + uint32_t c = labels[i]; + residuals[i * dim + j] = val - centers[c * dim + j]; +} + +template +void extend(raft::resources const& handle, + index* index, + const T* new_vectors, + const int64_t* new_indices, + int64_t n_rows) +{ + using LabelT = uint32_t; + RAFT_EXPECTS(index != nullptr, "index cannot be empty."); + if (n_rows == 0) return; + + auto stream = raft::resource::get_cuda_stream(handle); + auto n_lists = index->n_lists(); + auto dim = index->dim(); + list_spec list_device_spec{index->dim(), + index->conservative_memory_allocation()}; + cuvs::common::nvtx::range fun_scope( + "ivf_sq::extend(%zu, %u)", size_t(n_rows), dim); + + RAFT_EXPECTS(new_indices != nullptr || index->size() == 0, + "You must pass data indices when the index is non-empty."); + + auto new_labels = + raft::make_device_mdarray(handle, + raft::resource::get_large_workspace_resource(handle), + raft::make_extents(n_rows)); + cuvs::cluster::kmeans::balanced_params kmeans_params; + kmeans_params.metric = index->metric(); + auto orig_centroids_view = raft::make_device_matrix_view( + index->centers().data_handle(), n_lists, dim); + + constexpr size_t kReasonableMaxBatchSize = 65536; + size_t max_batch_size = std::min(n_rows, kReasonableMaxBatchSize); + + auto copy_stream = raft::resource::get_cuda_stream(handle); + bool enable_prefetch = false; + if (handle.has_resource_factory(raft::resource::resource_type::CUDA_STREAM_POOL)) { + if (raft::resource::get_stream_pool_size(handle) >= 1) { + enable_prefetch = true; + copy_stream = raft::resource::get_stream_from_stream_pool(handle); + } + } + + utils::batch_load_iterator vec_batches(new_vectors, + n_rows, + index->dim(), + max_batch_size, + copy_stream, + raft::resource::get_workspace_resource(handle), + enable_prefetch); + vec_batches.prefetch_next_batch(); + + for (const auto& batch : vec_batches) { + auto batch_data_view = + raft::make_device_matrix_view(batch.data(), batch.size(), index->dim()); + auto batch_labels_view = raft::make_device_vector_view( + new_labels.data_handle() + batch.offset(), batch.size()); + cuvs::cluster::kmeans::predict( + handle, kmeans_params, batch_data_view, orig_centroids_view, batch_labels_view); + vec_batches.prefetch_next_batch(); + raft::resource::sync_stream(handle); + } + + auto* list_sizes_ptr = index->list_sizes().data_handle(); + auto old_list_sizes_dev = raft::make_device_mdarray( + handle, raft::resource::get_workspace_resource(handle), raft::make_extents(n_lists)); + raft::copy(old_list_sizes_dev.data_handle(), list_sizes_ptr, n_lists, stream); + + if (index->adaptive_centers()) { + auto centroids_view = raft::make_device_matrix_view( + index->centers().data_handle(), index->centers().extent(0), index->centers().extent(1)); + auto list_sizes_view = + raft::make_device_vector_view, int64_t>( + list_sizes_ptr, n_lists); + for (const auto& batch : vec_batches) { + auto batch_data_view = + raft::make_device_matrix_view(batch.data(), batch.size(), index->dim()); + auto batch_labels_view = raft::make_device_vector_view( + new_labels.data_handle() + batch.offset(), batch.size()); + cuvs::cluster::kmeans_balanced::helpers::calc_centers_and_sizes(handle, + batch_data_view, + batch_labels_view, + centroids_view, + list_sizes_view, + false, + utils::mapping{}); + } + } else { + raft::stats::histogram(raft::stats::HistTypeAuto, + reinterpret_cast(list_sizes_ptr), + int64_t(n_lists), + new_labels.data_handle(), + n_rows, + 1, + stream); + raft::linalg::add( + list_sizes_ptr, list_sizes_ptr, old_list_sizes_dev.data_handle(), n_lists, stream); + } + + std::vector new_list_sizes(n_lists); + std::vector old_list_sizes(n_lists); + { + raft::copy(old_list_sizes.data(), old_list_sizes_dev.data_handle(), n_lists, stream); + raft::copy(new_list_sizes.data(), list_sizes_ptr, n_lists, stream); + raft::resource::sync_stream(handle); + auto& lists = index->lists(); + for (uint32_t label = 0; label < n_lists; label++) { + ivf::resize_list(handle, + lists[label], + list_device_spec, + new_list_sizes[label], + raft::Pow2::roundUp(old_list_sizes[label])); + } + } + ivf::detail::recompute_internal_state(handle, *index); + raft::copy(list_sizes_ptr, old_list_sizes_dev.data_handle(), n_lists, stream); + + utils::batch_load_iterator vec_indices( + new_indices, n_rows, 1, max_batch_size, stream, raft::resource::get_workspace_resource(handle)); + vec_batches.reset(); + vec_batches.prefetch_next_batch(); + utils::batch_load_iterator idx_batch = vec_indices.begin(); + + auto residuals_buf = raft::make_device_vector(handle, max_batch_size * dim); + + size_t next_report_offset = 0; + size_t d_report_offset = n_rows * 5 / 100; + + for (const auto& batch : vec_batches) { + int64_t bs = batch.size(); + + { + dim3 threads(32, 8); + dim3 blocks(raft::ceildiv(bs, threads.x), raft::ceildiv(dim, threads.y)); + compute_residuals_kernel + <<>>(batch.data(), + index->centers().data_handle(), + new_labels.data_handle() + batch.offset(), + residuals_buf.data_handle(), + bs, + dim); + RAFT_CUDA_TRY(cudaPeekAtLastError()); + } + + { + constexpr int kEncodeBlockSize = 256; + constexpr int kEncodeWarpsPerBlk = kEncodeBlockSize / kIndexGroupSize; + const dim3 block_dim(kEncodeBlockSize); + const dim3 grid_dim(raft::ceildiv(bs, int64_t(kEncodeWarpsPerBlk))); + encode_and_fill_kernel + <<>>(new_labels.data_handle() + batch.offset(), + residuals_buf.data_handle(), + idx_batch->data(), + index->data_ptrs().data_handle(), + index->inds_ptrs().data_handle(), + list_sizes_ptr, + index->sq_vmin().data_handle(), + index->sq_delta().data_handle(), + bs, + dim, + batch.offset()); + RAFT_CUDA_TRY(cudaPeekAtLastError()); + } + + vec_batches.prefetch_next_batch(); + raft::resource::sync_stream(handle); + RAFT_CUDA_TRY(cudaPeekAtLastError()); + + if (batch.offset() > next_report_offset) { + float progress = batch.offset() * 100.0f / n_rows; + RAFT_LOG_DEBUG("ivf_sq::extend added vectors %zu, %6.1f%% complete", + static_cast(batch.offset()), + progress); + next_report_offset += d_report_offset; + } + ++idx_batch; + } + + auto compute_center_norms = [&]() { + if (index->metric() == cuvs::distance::DistanceType::CosineExpanded) { + raft::linalg::rowNorm(index->center_norms()->data_handle(), + index->centers().data_handle(), + dim, + n_lists, + stream, + raft::sqrt_op{}); + } else { + raft::linalg::rowNorm( + index->center_norms()->data_handle(), index->centers().data_handle(), dim, n_lists, stream); + } + }; + + if (!index->center_norms().has_value()) { + index->allocate_center_norms(handle); + if (index->center_norms().has_value()) { compute_center_norms(); } + } else if (index->adaptive_centers()) { + compute_center_norms(); + } +} + +template +auto extend(raft::resources const& handle, + const index& orig_index, + const T* new_vectors, + const int64_t* new_indices, + int64_t n_rows) -> index +{ + auto ext_index = clone(handle, orig_index); + detail::extend(handle, &ext_index, new_vectors, new_indices, n_rows); + return ext_index; +} + +template +inline auto build(raft::resources const& handle, + const index_params& params, + const T* dataset, + int64_t n_rows, + uint32_t dim) -> index +{ + auto stream = raft::resource::get_cuda_stream(handle); + cuvs::common::nvtx::range fun_scope( + "ivf_sq::build(%zu, %u)", size_t(n_rows), dim); + static_assert(std::is_same_v || std::is_same_v, "unsupported data type"); + RAFT_EXPECTS(n_rows > 0 && dim > 0, "empty dataset"); + RAFT_EXPECTS(n_rows >= params.n_lists, "number of rows can't be less than n_lists"); + RAFT_EXPECTS(params.metric != cuvs::distance::DistanceType::CosineExpanded || dim > 1, + "Cosine metric requires more than one dim"); + + index idx(handle, params, dim); + utils::memzero(idx.accum_sorted_sizes().data_handle(), idx.accum_sorted_sizes().size(), stream); + utils::memzero(idx.list_sizes().data_handle(), idx.list_sizes().size(), stream); + utils::memzero(idx.data_ptrs().data_handle(), idx.data_ptrs().size(), stream); + utils::memzero(idx.inds_ptrs().data_handle(), idx.inds_ptrs().size(), stream); + + // Train k-means centroids and SQ parameters on the same training subset. + // This mirrors IVF-PQ, which also trains its codebook on a subset of the data. + { + auto trainset_ratio = std::max( + 1, n_rows / std::max(params.kmeans_trainset_fraction * n_rows, idx.n_lists())); + auto n_rows_train = n_rows / trainset_ratio; + rmm::device_uvector trainset( + n_rows_train * idx.dim(), stream, raft::resource::get_large_workspace_resource(handle)); + RAFT_CUDA_TRY(cudaMemcpy2DAsync(trainset.data(), + sizeof(T) * idx.dim(), + dataset, + sizeof(T) * idx.dim() * trainset_ratio, + sizeof(T) * idx.dim(), + n_rows_train, + cudaMemcpyDefault, + stream)); + auto trainset_const_view = + raft::make_device_matrix_view(trainset.data(), n_rows_train, idx.dim()); + auto centers_view = raft::make_device_matrix_view( + idx.centers().data_handle(), idx.n_lists(), idx.dim()); + cuvs::cluster::kmeans::balanced_params kmeans_params; + kmeans_params.n_iters = params.kmeans_n_iters; + kmeans_params.metric = idx.metric(); + cuvs::cluster::kmeans::fit(handle, kmeans_params, trainset_const_view, centers_view); + raft::resource::sync_stream(handle); + + // Train SQ: predict labels for the training subset, compute its residuals, + // and derive per-dimension vmin/delta from them. + auto train_labels = raft::make_device_vector(handle, n_rows_train); + { + cuvs::cluster::kmeans::balanced_params pred_params; + pred_params.metric = idx.metric(); + auto centers_const_view = raft::make_device_matrix_view( + idx.centers().data_handle(), idx.n_lists(), dim); + cuvs::cluster::kmeans::predict( + handle, pred_params, trainset_const_view, centers_const_view, train_labels.view()); + raft::resource::sync_stream(handle); + } + + rmm::device_uvector residuals( + n_rows_train * dim, stream, raft::resource::get_large_workspace_resource(handle)); + { + dim3 threads(32, 8); + dim3 blocks(raft::ceildiv(n_rows_train, threads.x), + raft::ceildiv(dim, threads.y)); + compute_residuals_kernel<<>>(trainset.data(), + idx.centers().data_handle(), + train_labels.data_handle(), + residuals.data(), + n_rows_train, + dim); + RAFT_CUDA_TRY(cudaPeekAtLastError()); + } + + { + auto vmax_buf = raft::make_device_vector(handle, dim); + auto* vmin_ptr = idx.sq_vmin().data_handle(); + auto* vmax_ptr = vmax_buf.data_handle(); + + constexpr int kMinMaxBlockSize = 256; + fused_column_minmax_kernel<<>>( + residuals.data(), vmin_ptr, vmax_ptr, n_rows_train, dim); + RAFT_CUDA_TRY(cudaPeekAtLastError()); + + // Expand the observed range by a small margin to reduce clipping on unseen data, + // since the SQ parameters are trained on a subset rather than the full dataset. + constexpr float kMargin = 0.05f; + auto* delta_ptr = idx.sq_delta().data_handle(); + raft::linalg::map_offset( + handle, idx.sq_vmin(), [vmin_ptr, vmax_ptr, delta_ptr, kMargin] __device__(uint32_t j) { + float range = vmax_ptr[j] - vmin_ptr[j]; + float margin = range * kMargin; + delta_ptr[j] = (range > 0.0f) ? (range + 2.0f * margin) / 255.0f : 1.0f; + return vmin_ptr[j] - margin; + }); + } + } + + if (params.add_data_on_build) { detail::extend(handle, &idx, dataset, nullptr, n_rows); } + + return idx; +} + +template +auto build(raft::resources const& handle, + const index_params& params, + raft::device_matrix_view dataset) -> index +{ + int64_t n_rows = dataset.extent(0); + uint32_t dim = dataset.extent(1); + return build(handle, params, dataset.data_handle(), n_rows, dim); +} + +template +auto build(raft::resources const& handle, + const index_params& params, + raft::host_matrix_view dataset) -> index +{ + int64_t n_rows = dataset.extent(0); + uint32_t dim = dataset.extent(1); + return build(handle, params, dataset.data_handle(), n_rows, dim); +} + +template +void build(raft::resources const& handle, + const index_params& params, + raft::device_matrix_view dataset, + index& idx) +{ + idx = build(handle, params, dataset); +} + +template +void build(raft::resources const& handle, + const index_params& params, + raft::host_matrix_view dataset, + index& idx) +{ + idx = build(handle, params, dataset); +} + +template +auto extend(raft::resources const& handle, + raft::device_matrix_view new_vectors, + std::optional> new_indices, + const index& orig_index) -> index +{ + RAFT_EXPECTS(new_vectors.extent(1) == orig_index.dim(), + "new_vectors should have the same dimension as the index"); + if (new_indices.has_value()) { + RAFT_EXPECTS(new_indices.value().extent(0) == new_vectors.extent(0), + "new_vectors and new_indices have different number of rows"); + } + int64_t n_rows = new_vectors.extent(0); + return extend(handle, + orig_index, + new_vectors.data_handle(), + new_indices.has_value() ? new_indices.value().data_handle() : nullptr, + n_rows); +} + +template +auto extend(raft::resources const& handle, + raft::host_matrix_view new_vectors, + std::optional> new_indices, + const index& orig_index) -> index +{ + RAFT_EXPECTS(new_vectors.extent(1) == orig_index.dim(), + "new_vectors should have the same dimension as the index"); + if (new_indices.has_value()) { + RAFT_EXPECTS(new_indices.value().extent(0) == new_vectors.extent(0), + "new_vectors and new_indices have different number of rows"); + } + int64_t n_rows = new_vectors.extent(0); + return extend(handle, + orig_index, + new_vectors.data_handle(), + new_indices.has_value() ? new_indices.value().data_handle() : nullptr, + n_rows); +} + +template +void extend(raft::resources const& handle, + raft::device_matrix_view new_vectors, + std::optional> new_indices, + index* idx) +{ + RAFT_EXPECTS(new_vectors.extent(1) == idx->dim(), + "new_vectors should have the same dimension as the index"); + if (new_indices.has_value()) { + RAFT_EXPECTS(new_indices.value().extent(0) == new_vectors.extent(0), + "new_vectors and new_indices have different number of rows"); + } + detail::extend(handle, + idx, + new_vectors.data_handle(), + new_indices.has_value() ? new_indices.value().data_handle() : nullptr, + new_vectors.extent(0)); +} + +template +void extend(raft::resources const& handle, + raft::host_matrix_view new_vectors, + std::optional> new_indices, + index* idx) +{ + RAFT_EXPECTS(new_vectors.extent(1) == idx->dim(), + "new_vectors should have the same dimension as the index"); + if (new_indices.has_value()) { + RAFT_EXPECTS(new_indices.value().extent(0) == new_vectors.extent(0), + "new_vectors and new_indices have different number of rows"); + } + detail::extend(handle, + idx, + new_vectors.data_handle(), + new_indices.has_value() ? new_indices.value().data_handle() : nullptr, + new_vectors.extent(0)); +} + +} // namespace detail +} // namespace cuvs::neighbors::ivf_sq diff --git a/cpp/src/neighbors/ivf_sq/ivf_sq_build_extend_float_uint8_t_int64_t.cu b/cpp/src/neighbors/ivf_sq/ivf_sq_build_extend_float_uint8_t_int64_t.cu new file mode 100644 index 0000000000..a97aebb11c --- /dev/null +++ b/cpp/src/neighbors/ivf_sq/ivf_sq_build_extend_float_uint8_t_int64_t.cu @@ -0,0 +1,89 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#include + +#include "ivf_sq_build.cuh" + +namespace cuvs::neighbors::ivf_sq { + +#define CUVS_INST_IVF_SQ_BUILD_EXTEND(T, IdxT) \ + auto build(raft::resources const& handle, \ + const cuvs::neighbors::ivf_sq::index_params& params, \ + raft::device_matrix_view dataset) \ + -> cuvs::neighbors::ivf_sq::index \ + { \ + return cuvs::neighbors::ivf_sq::index( \ + std::move(cuvs::neighbors::ivf_sq::detail::build(handle, params, dataset))); \ + } \ + \ + void build(raft::resources const& handle, \ + const cuvs::neighbors::ivf_sq::index_params& params, \ + raft::device_matrix_view dataset, \ + cuvs::neighbors::ivf_sq::index& idx) \ + { \ + cuvs::neighbors::ivf_sq::detail::build(handle, params, dataset, idx); \ + } \ + \ + auto build(raft::resources const& handle, \ + const cuvs::neighbors::ivf_sq::index_params& params, \ + raft::host_matrix_view dataset) \ + -> cuvs::neighbors::ivf_sq::index \ + { \ + return cuvs::neighbors::ivf_sq::index( \ + std::move(cuvs::neighbors::ivf_sq::detail::build(handle, params, dataset))); \ + } \ + \ + void build(raft::resources const& handle, \ + const cuvs::neighbors::ivf_sq::index_params& params, \ + raft::host_matrix_view dataset, \ + cuvs::neighbors::ivf_sq::index& idx) \ + { \ + cuvs::neighbors::ivf_sq::detail::build(handle, params, dataset, idx); \ + } \ + \ + auto extend(raft::resources const& handle, \ + raft::device_matrix_view new_vectors, \ + std::optional> new_indices, \ + const cuvs::neighbors::ivf_sq::index& orig_index) \ + -> cuvs::neighbors::ivf_sq::index \ + { \ + return cuvs::neighbors::ivf_sq::index( \ + std::move(cuvs::neighbors::ivf_sq::detail::extend( \ + handle, new_vectors, new_indices, orig_index))); \ + } \ + \ + void extend(raft::resources const& handle, \ + raft::device_matrix_view new_vectors, \ + std::optional> new_indices, \ + cuvs::neighbors::ivf_sq::index* idx) \ + { \ + cuvs::neighbors::ivf_sq::detail::extend(handle, new_vectors, new_indices, idx); \ + } \ + \ + auto extend(raft::resources const& handle, \ + raft::host_matrix_view new_vectors, \ + std::optional> new_indices, \ + const cuvs::neighbors::ivf_sq::index& orig_index) \ + -> cuvs::neighbors::ivf_sq::index \ + { \ + return cuvs::neighbors::ivf_sq::index( \ + std::move(cuvs::neighbors::ivf_sq::detail::extend( \ + handle, new_vectors, new_indices, orig_index))); \ + } \ + \ + void extend(raft::resources const& handle, \ + raft::host_matrix_view new_vectors, \ + std::optional> new_indices, \ + cuvs::neighbors::ivf_sq::index* idx) \ + { \ + cuvs::neighbors::ivf_sq::detail::extend(handle, new_vectors, new_indices, idx); \ + } + +CUVS_INST_IVF_SQ_BUILD_EXTEND(float, uint8_t); + +#undef CUVS_INST_IVF_SQ_BUILD_EXTEND + +} // namespace cuvs::neighbors::ivf_sq diff --git a/cpp/src/neighbors/ivf_sq/ivf_sq_build_extend_half_uint8_t_int64_t.cu b/cpp/src/neighbors/ivf_sq/ivf_sq_build_extend_half_uint8_t_int64_t.cu new file mode 100644 index 0000000000..9148e5c328 --- /dev/null +++ b/cpp/src/neighbors/ivf_sq/ivf_sq_build_extend_half_uint8_t_int64_t.cu @@ -0,0 +1,89 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#include + +#include "ivf_sq_build.cuh" + +namespace cuvs::neighbors::ivf_sq { + +#define CUVS_INST_IVF_SQ_BUILD_EXTEND(T, IdxT) \ + auto build(raft::resources const& handle, \ + const cuvs::neighbors::ivf_sq::index_params& params, \ + raft::device_matrix_view dataset) \ + -> cuvs::neighbors::ivf_sq::index \ + { \ + return cuvs::neighbors::ivf_sq::index( \ + std::move(cuvs::neighbors::ivf_sq::detail::build(handle, params, dataset))); \ + } \ + \ + void build(raft::resources const& handle, \ + const cuvs::neighbors::ivf_sq::index_params& params, \ + raft::device_matrix_view dataset, \ + cuvs::neighbors::ivf_sq::index& idx) \ + { \ + cuvs::neighbors::ivf_sq::detail::build(handle, params, dataset, idx); \ + } \ + \ + auto build(raft::resources const& handle, \ + const cuvs::neighbors::ivf_sq::index_params& params, \ + raft::host_matrix_view dataset) \ + -> cuvs::neighbors::ivf_sq::index \ + { \ + return cuvs::neighbors::ivf_sq::index( \ + std::move(cuvs::neighbors::ivf_sq::detail::build(handle, params, dataset))); \ + } \ + \ + void build(raft::resources const& handle, \ + const cuvs::neighbors::ivf_sq::index_params& params, \ + raft::host_matrix_view dataset, \ + cuvs::neighbors::ivf_sq::index& idx) \ + { \ + cuvs::neighbors::ivf_sq::detail::build(handle, params, dataset, idx); \ + } \ + \ + auto extend(raft::resources const& handle, \ + raft::device_matrix_view new_vectors, \ + std::optional> new_indices, \ + const cuvs::neighbors::ivf_sq::index& orig_index) \ + -> cuvs::neighbors::ivf_sq::index \ + { \ + return cuvs::neighbors::ivf_sq::index( \ + std::move(cuvs::neighbors::ivf_sq::detail::extend( \ + handle, new_vectors, new_indices, orig_index))); \ + } \ + \ + void extend(raft::resources const& handle, \ + raft::device_matrix_view new_vectors, \ + std::optional> new_indices, \ + cuvs::neighbors::ivf_sq::index* idx) \ + { \ + cuvs::neighbors::ivf_sq::detail::extend(handle, new_vectors, new_indices, idx); \ + } \ + \ + auto extend(raft::resources const& handle, \ + raft::host_matrix_view new_vectors, \ + std::optional> new_indices, \ + const cuvs::neighbors::ivf_sq::index& orig_index) \ + -> cuvs::neighbors::ivf_sq::index \ + { \ + return cuvs::neighbors::ivf_sq::index( \ + std::move(cuvs::neighbors::ivf_sq::detail::extend( \ + handle, new_vectors, new_indices, orig_index))); \ + } \ + \ + void extend(raft::resources const& handle, \ + raft::host_matrix_view new_vectors, \ + std::optional> new_indices, \ + cuvs::neighbors::ivf_sq::index* idx) \ + { \ + cuvs::neighbors::ivf_sq::detail::extend(handle, new_vectors, new_indices, idx); \ + } + +CUVS_INST_IVF_SQ_BUILD_EXTEND(half, uint8_t); + +#undef CUVS_INST_IVF_SQ_BUILD_EXTEND + +} // namespace cuvs::neighbors::ivf_sq diff --git a/cpp/src/neighbors/ivf_sq/ivf_sq_search.cuh b/cpp/src/neighbors/ivf_sq/ivf_sq_search.cuh new file mode 100644 index 0000000000..39c653b048 --- /dev/null +++ b/cpp/src/neighbors/ivf_sq/ivf_sq_search.cuh @@ -0,0 +1,549 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include "../../core/nvtx.hpp" +#include "../detail/ann_utils.cuh" +#include "../ivf_common.cuh" +#include "../sample_filter.cuh" +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +namespace cuvs::neighbors::ivf_sq::detail { + +using namespace cuvs::spatial::knn::detail; // NOLINT + +enum class SqScanMetric { kL2, kIP, kCosine }; + +/** + * Per-probe scan kernel for IVF-SQ search. + * + * Grid: (n_queries, n_probes). Each block handles one (query, probe) pair. + * Within a block, each warp processes one interleaved group of kIndexGroupSize + * (=32) vectors at a time, with each lane responsible for one vector. + * Dimension blocks of veclen=16 bytes are loaded as coalesced uint4 reads + * across the warp (32 lanes x 16 bytes = 512 bytes = 4 cache lines), giving + * full memory-bandwidth utilisation. + * + * Per-dimension constants that are invariant across rows are precomputed into + * shared memory so the hot loop only reads from smem + one uint4 per dim-block: + * + * L2 / L2Sqrt: + * s_query_term[d] = query[d] - centroid[d] - sq_vmin[d] + * dist += (s_query_term[d] - code * s_sq_scale[d])^2 + * + * InnerProduct / Cosine: + * s_query_term[d] = query[d] + * s_recon_base[d] = centroid[d] + sq_vmin[d] + * v_d = s_recon_base[d] + code * s_sq_scale[d] + * dist += s_query_term[d] * v_d + * + * Shared-memory layout adapts to the metric to avoid waste: + * L2 / L2Sqrt : [s_query_term | s_sq_scale] (2 * dim floats) + * InnerProduct/Cosine: [s_query_term | s_recon_base | s_sq_scale] (3 * dim floats) + */ +template +__launch_bounds__(BlockDim) RAFT_KERNEL ivf_sq_scan_kernel(const uint8_t* const* data_ptrs, + const uint32_t* list_sizes, + const uint32_t* coarse_indices, + const float* queries_float, + const float* centers, + const float* sq_vmin, + const float* sq_delta, + const float* query_norms, + uint32_t n_probes, + uint32_t dim, + uint32_t max_samples, + const uint32_t* chunk_indices, + float* out_distances, + uint32_t* out_indices, + IvfSampleFilterT sample_filter) +{ + static_assert(kIndexGroupSize == raft::WarpSize, + "Warp-coalesced scan requires kIndexGroupSize == WarpSize"); + + extern __shared__ float smem[]; + + constexpr bool kIsL2 = (Metric == SqScanMetric::kL2); + constexpr bool kIsCosine = (Metric == SqScanMetric::kCosine); + + float* s_query_term = smem; + float* s_recon_base = smem + dim; + float* s_sq_scale = kIsL2 ? (smem + dim) : (smem + 2 * dim); + + const uint32_t query_ix = blockIdx.x; + const uint32_t probe_ix = blockIdx.y; + + const uint32_t* my_coarse = coarse_indices + query_ix * n_probes; + const uint32_t cluster_id = my_coarse[probe_ix]; + const uint32_t cluster_sz = list_sizes[cluster_id]; + if (cluster_sz == 0) return; + + const uint8_t* codes = data_ptrs[cluster_id]; + const float* query = queries_float + query_ix * dim; + const float* centroid = centers + cluster_id * dim; + + for (uint32_t d = threadIdx.x; d < dim; d += BlockDim) { + float vmin_d = sq_vmin[d]; + s_sq_scale[d] = sq_delta[d]; + if constexpr (kIsL2) { + s_query_term[d] = query[d] - centroid[d] - vmin_d; + } else { + s_query_term[d] = query[d]; + s_recon_base[d] = centroid[d] + vmin_d; + } + } + __syncthreads(); + + const uint32_t* my_chunk = chunk_indices + query_ix * n_probes; + uint32_t out_base = (probe_ix > 0) ? my_chunk[probe_ix - 1] : 0; + + constexpr uint32_t veclen = 16; + constexpr uint32_t kWarpsPerBlock = BlockDim / raft::WarpSize; + const uint32_t warp_id = threadIdx.x / raft::WarpSize; + const uint32_t lane_id = threadIdx.x % raft::WarpSize; + + uint32_t padded_dim = ((dim + veclen - 1) / veclen) * veclen; + uint32_t n_dim_blocks = padded_dim / veclen; + + for (uint32_t group = warp_id * kIndexGroupSize; group < cluster_sz; + group += kWarpsPerBlock * kIndexGroupSize) { + const uint32_t row = group + lane_id; + const bool valid = (row < cluster_sz) && sample_filter(query_ix, cluster_id, row); + + float dist = 0.0f; + float v_norm_sq = 0.0f; + + const uint8_t* group_data = codes + size_t(group) * padded_dim; + + for (uint32_t bl = 0; bl < n_dim_blocks; bl++) { + uint8_t codes_local[veclen]; + *reinterpret_cast(codes_local) = *reinterpret_cast( + group_data + bl * (veclen * kIndexGroupSize) + lane_id * veclen); + + const uint32_t l = bl * veclen; +#pragma unroll + for (uint32_t j = 0; j < veclen; j++) { + if (l + j < dim) { + float recon = float(codes_local[j]) * s_sq_scale[l + j]; + + if constexpr (kIsL2) { + float diff = s_query_term[l + j] - recon; + dist += diff * diff; + } else { + float v_d = s_recon_base[l + j] + recon; + dist += s_query_term[l + j] * v_d; + if constexpr (kIsCosine) { v_norm_sq += v_d * v_d; } + } + } + } + } + + if constexpr (kIsCosine) { + float denom = query_norms[query_ix] * sqrtf(v_norm_sq); + dist = (denom > 0.0f) ? 1.0f - dist / denom : 0.0f; + } + + if (valid) { + uint32_t out_idx = query_ix * max_samples + out_base + row; + out_distances[out_idx] = dist; + out_indices[out_idx] = out_base + row; + } + } +} + +template +void ivf_sq_scan(raft::resources const& handle, + const index& idx, + const float* queries_float, + const float* query_norms, + uint32_t n_queries, + uint32_t n_probes, + uint32_t max_samples, + const uint32_t* coarse_indices, + const uint32_t* chunk_indices, + float* out_distances, + uint32_t* out_indices, + IvfSampleFilterT sample_filter, + rmm::cuda_stream_view stream) +{ + constexpr int kThreads = 256; + dim3 grid(n_queries, n_probes); + dim3 block(kThreads); + uint32_t dim = idx.dim(); + + auto do_launch = [&](auto kernel_ptr, size_t smem) { + RAFT_CUDA_TRY( + cudaFuncSetAttribute(kernel_ptr, cudaFuncAttributeMaxDynamicSharedMemorySize, smem)); + kernel_ptr<<>>(idx.data_ptrs().data_handle(), + idx.list_sizes().data_handle(), + coarse_indices, + queries_float, + idx.centers().data_handle(), + idx.sq_vmin().data_handle(), + idx.sq_delta().data_handle(), + query_norms, + n_probes, + dim, + max_samples, + chunk_indices, + out_distances, + out_indices, + sample_filter); + RAFT_CUDA_TRY(cudaPeekAtLastError()); + }; + + switch (idx.metric()) { + case cuvs::distance::DistanceType::L2Expanded: + case cuvs::distance::DistanceType::L2SqrtExpanded: + do_launch(ivf_sq_scan_kernel, + 2 * dim * sizeof(float)); + break; + case cuvs::distance::DistanceType::InnerProduct: + do_launch(ivf_sq_scan_kernel, + 3 * dim * sizeof(float)); + break; + case cuvs::distance::DistanceType::CosineExpanded: + do_launch(ivf_sq_scan_kernel, + 3 * dim * sizeof(float)); + break; + default: RAFT_FAIL("Unsupported metric type for IVF-SQ scan."); + } +} + +template +void search_impl(raft::resources const& handle, + const index& index, + const T* queries, + uint32_t n_queries, + uint32_t k, + uint32_t n_probes, + bool select_min, + int64_t* neighbors, + float* distances, + rmm::device_async_resource_ref search_mr, + IvfSampleFilterT sample_filter) +{ + auto stream = raft::resource::get_cuda_stream(handle); + auto dim = index.dim(); + + std::size_t n_queries_probes = std::size_t(n_queries) * std::size_t(n_probes); + + rmm::device_uvector query_norm_dev(n_queries, stream, search_mr); + rmm::device_uvector distance_buffer_dev(n_queries * index.n_lists(), stream, search_mr); + rmm::device_uvector coarse_distances_dev(n_queries_probes, stream, search_mr); + rmm::device_uvector coarse_indices_dev(n_queries_probes, stream, search_mr); + + size_t float_query_size; + if constexpr (std::is_same_v) { + float_query_size = 0; + } else { + float_query_size = n_queries * dim; + } + rmm::device_uvector converted_queries_dev(float_query_size, stream, search_mr); + float* converted_queries_ptr = converted_queries_dev.data(); + + if constexpr (std::is_same_v) { + converted_queries_ptr = const_cast(queries); + } else { + raft::linalg::unaryOp( + converted_queries_ptr, queries, n_queries * dim, utils::mapping{}, stream); + } + + auto distance_buffer_dev_view = raft::make_device_matrix_view( + distance_buffer_dev.data(), n_queries, index.n_lists()); + + RAFT_EXPECTS(index.metric() == cuvs::distance::DistanceType::InnerProduct || + index.center_norms().has_value(), + "Center norms are required for search with L2 or Cosine metric. " + "Rebuild the index with add_data_on_build=true or call extend() first."); + + float alpha = 1.0f; + float beta = 0.0f; + switch (index.metric()) { + case cuvs::distance::DistanceType::L2Expanded: + case cuvs::distance::DistanceType::L2SqrtExpanded: { + alpha = -2.0f; + beta = 1.0f; + raft::linalg::rowNorm(query_norm_dev.data(), + converted_queries_ptr, + static_cast(dim), + static_cast(n_queries), + stream); + utils::outer_add(query_norm_dev.data(), + (int64_t)n_queries, + index.center_norms()->data_handle(), + (int64_t)index.n_lists(), + distance_buffer_dev.data(), + stream); + break; + } + case cuvs::distance::DistanceType::CosineExpanded: { + raft::linalg::rowNorm(query_norm_dev.data(), + converted_queries_ptr, + static_cast(dim), + static_cast(n_queries), + stream, + raft::sqrt_op{}); + alpha = -1.0f; + beta = 0.0f; + break; + } + case cuvs::distance::DistanceType::InnerProduct: { + alpha = 1.0f; + beta = 0.0f; + break; + } + default: RAFT_FAIL("Unsupported metric type for IVF-SQ search."); + } + + raft::linalg::gemm(handle, + true, + false, + index.n_lists(), + n_queries, + dim, + &alpha, + index.centers().data_handle(), + dim, + converted_queries_ptr, + dim, + &beta, + distance_buffer_dev.data(), + index.n_lists(), + stream); + + if (index.metric() == cuvs::distance::DistanceType::CosineExpanded) { + auto n_lists_local = index.n_lists(); + const auto* q_norm_ptr = query_norm_dev.data(); + const auto* center_norm_ptr = index.center_norms()->data_handle(); + raft::linalg::map_offset( + handle, + distance_buffer_dev_view, + [=] __device__(const uint32_t idx, const float dist) { + const auto query = idx / n_lists_local; + const auto cluster = idx % n_lists_local; + float denom = q_norm_ptr[query] * center_norm_ptr[cluster]; + return (denom > 0.0f) ? dist / denom : 0.0f; + }, + raft::make_const_mdspan(distance_buffer_dev_view)); + } + + cuvs::selection::select_k( + handle, + raft::make_const_mdspan(distance_buffer_dev_view), + std::nullopt, + raft::make_device_matrix_view(coarse_distances_dev.data(), n_queries, n_probes), + raft::make_device_matrix_view( + coarse_indices_dev.data(), n_queries, n_probes), + select_min); + + rmm::device_uvector num_samples(n_queries, stream, search_mr); + rmm::device_uvector chunk_index(n_queries_probes, stream, search_mr); + + ivf::detail::calc_chunk_indices::configure(n_probes, n_queries)(index.list_sizes().data_handle(), + coarse_indices_dev.data(), + chunk_index.data(), + num_samples.data(), + stream); + + uint32_t max_samples = + std::max(static_cast(index.accum_sorted_sizes()(n_probes)), k); + + rmm::device_uvector all_distances(std::size_t(n_queries) * max_samples, stream, search_mr); + rmm::device_uvector all_indices( + std::size_t(n_queries) * max_samples, stream, search_mr); + + float init_val = + select_min ? std::numeric_limits::max() : std::numeric_limits::lowest(); + thrust::fill_n(raft::resource::get_thrust_policy(handle), + all_distances.data(), + std::size_t(n_queries) * max_samples, + init_val); + thrust::fill_n(raft::resource::get_thrust_policy(handle), + all_indices.data(), + std::size_t(n_queries) * max_samples, + uint32_t(0xFFFFFFFF)); + + auto filter_adapter = cuvs::neighbors::filtering::ivf_to_sample_filter( + index.inds_ptrs().data_handle(), sample_filter); + + ivf_sq_scan(handle, + index, + converted_queries_ptr, + query_norm_dev.data(), + n_queries, + n_probes, + max_samples, + coarse_indices_dev.data(), + chunk_index.data(), + all_distances.data(), + all_indices.data(), + filter_adapter, + stream); + + rmm::device_uvector neighbors_uint32(0, stream, search_mr); + uint32_t* neighbors_uint32_ptr = nullptr; + if constexpr (sizeof(int64_t) == sizeof(uint32_t)) { + neighbors_uint32_ptr = reinterpret_cast(neighbors); + } else { + neighbors_uint32.resize(std::size_t(n_queries) * k, stream); + neighbors_uint32_ptr = neighbors_uint32.data(); + } + + auto num_samples_view = + raft::make_device_vector_view(num_samples.data(), n_queries); + + cuvs::selection::select_k( + handle, + raft::make_device_matrix_view( + all_distances.data(), n_queries, max_samples), + raft::make_device_matrix_view( + all_indices.data(), n_queries, max_samples), + raft::make_device_matrix_view(distances, n_queries, k), + raft::make_device_matrix_view(neighbors_uint32_ptr, n_queries, k), + select_min, + false, + cuvs::selection::SelectAlgo::kAuto, + num_samples_view); + + ivf::detail::postprocess_distances( + distances, distances, index.metric(), n_queries, k, 1.0, false, stream); + + ivf::detail::postprocess_neighbors(neighbors, + neighbors_uint32_ptr, + index.inds_ptrs().data_handle(), + coarse_indices_dev.data(), + chunk_index.data(), + n_queries, + n_probes, + k, + stream); +} + +template +inline void search_with_filtering(raft::resources const& handle, + const search_params& params, + const index& index, + const T* queries, + uint32_t n_queries, + uint32_t k, + int64_t* neighbors, + float* distances, + IvfSampleFilterT sample_filter = IvfSampleFilterT()) +{ + cuvs::common::nvtx::range fun_scope( + "ivf_sq::search(k = %u, n_queries = %u, dim = %zu)", k, n_queries, index.dim()); + + RAFT_EXPECTS(params.n_probes > 0, + "n_probes (number of clusters to probe in the search) must be positive."); + auto n_probes = std::min(params.n_probes, index.n_lists()); + + uint32_t max_samples = + std::max(static_cast(index.accum_sorted_sizes()(n_probes)), k); + + constexpr uint64_t kExpectedWsSize = 1024ull * 1024 * 1024; + uint64_t max_ws_size = + std::min(raft::resource::get_workspace_free_bytes(handle), kExpectedWsSize); + + uint64_t converted_query_floats = std::is_same_v ? 0 : index.dim(); + uint64_t ws_per_query = sizeof(float) * (uint64_t(index.n_lists()) + n_probes + 1 + max_samples + + converted_query_floats) + + sizeof(uint32_t) * (uint64_t(n_probes) * 2 + 1 + max_samples + k); + + const uint32_t max_queries = + std::min(n_queries, std::max(1, max_ws_size / ws_per_query)); + + for (uint32_t offset_q = 0; offset_q < n_queries; offset_q += max_queries) { + uint32_t queries_batch = std::min(max_queries, n_queries - offset_q); + + search_impl(handle, + index, + queries + std::size_t(offset_q) * index.dim(), + queries_batch, + k, + n_probes, + cuvs::distance::is_min_close(index.metric()), + neighbors + std::size_t(offset_q) * k, + distances + std::size_t(offset_q) * k, + raft::resource::get_workspace_resource(handle), + sample_filter); + } +} + +template +void search_with_filtering(raft::resources const& handle, + const search_params& params, + const index& index, + raft::device_matrix_view queries, + raft::device_matrix_view neighbors, + raft::device_matrix_view distances, + IvfSampleFilterT sample_filter = IvfSampleFilterT()) +{ + RAFT_EXPECTS( + queries.extent(0) == neighbors.extent(0) && queries.extent(0) == distances.extent(0), + "Number of rows in output neighbors and distances matrices must equal the number of queries."); + RAFT_EXPECTS(neighbors.extent(1) == distances.extent(1), + "Number of columns in output neighbors and distances matrices must be equal"); + RAFT_EXPECTS(queries.extent(1) == index.dim(), + "Number of query dimensions should equal number of dimensions in the index."); + + search_with_filtering(handle, + params, + index, + queries.data_handle(), + static_cast(queries.extent(0)), + static_cast(neighbors.extent(1)), + neighbors.data_handle(), + distances.data_handle(), + sample_filter); +} + +template +void search(raft::resources const& handle, + const search_params& params, + const index& idx, + raft::device_matrix_view queries, + raft::device_matrix_view neighbors, + raft::device_matrix_view distances, + const cuvs::neighbors::filtering::base_filter& sample_filter_ref) +{ + try { + auto& sample_filter = + dynamic_cast(sample_filter_ref); + return search_with_filtering(handle, params, idx, queries, neighbors, distances, sample_filter); + } catch (const std::bad_cast&) { + } + + try { + auto& sample_filter = + dynamic_cast&>( + sample_filter_ref); + return search_with_filtering(handle, params, idx, queries, neighbors, distances, sample_filter); + } catch (const std::bad_cast&) { + RAFT_FAIL("Unsupported sample filter type"); + } +} + +} // namespace cuvs::neighbors::ivf_sq::detail diff --git a/cpp/src/neighbors/ivf_sq/ivf_sq_search_float_uint8_t_int64_t.cu b/cpp/src/neighbors/ivf_sq/ivf_sq_search_float_uint8_t_int64_t.cu new file mode 100644 index 0000000000..60d95a153f --- /dev/null +++ b/cpp/src/neighbors/ivf_sq/ivf_sq_search_float_uint8_t_int64_t.cu @@ -0,0 +1,29 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#include + +#include "ivf_sq_search.cuh" + +namespace cuvs::neighbors::ivf_sq { + +#define CUVS_INST_IVF_SQ_SEARCH(T, IdxT) \ + void search(raft::resources const& handle, \ + const cuvs::neighbors::ivf_sq::search_params& params, \ + const cuvs::neighbors::ivf_sq::index& index, \ + raft::device_matrix_view queries, \ + raft::device_matrix_view neighbors, \ + raft::device_matrix_view distances, \ + const cuvs::neighbors::filtering::base_filter& sample_filter) \ + { \ + cuvs::neighbors::ivf_sq::detail::search( \ + handle, params, index, queries, neighbors, distances, sample_filter); \ + } + +CUVS_INST_IVF_SQ_SEARCH(float, uint8_t); + +#undef CUVS_INST_IVF_SQ_SEARCH + +} // namespace cuvs::neighbors::ivf_sq diff --git a/cpp/src/neighbors/ivf_sq/ivf_sq_search_half_uint8_t_int64_t.cu b/cpp/src/neighbors/ivf_sq/ivf_sq_search_half_uint8_t_int64_t.cu new file mode 100644 index 0000000000..fbed3fd432 --- /dev/null +++ b/cpp/src/neighbors/ivf_sq/ivf_sq_search_half_uint8_t_int64_t.cu @@ -0,0 +1,29 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#include + +#include "ivf_sq_search.cuh" + +namespace cuvs::neighbors::ivf_sq { + +#define CUVS_INST_IVF_SQ_SEARCH(T, IdxT) \ + void search(raft::resources const& handle, \ + const cuvs::neighbors::ivf_sq::search_params& params, \ + const cuvs::neighbors::ivf_sq::index& index, \ + raft::device_matrix_view queries, \ + raft::device_matrix_view neighbors, \ + raft::device_matrix_view distances, \ + const cuvs::neighbors::filtering::base_filter& sample_filter) \ + { \ + cuvs::neighbors::ivf_sq::detail::search( \ + handle, params, index, queries, neighbors, distances, sample_filter); \ + } + +CUVS_INST_IVF_SQ_SEARCH(half, uint8_t); + +#undef CUVS_INST_IVF_SQ_SEARCH + +} // namespace cuvs::neighbors::ivf_sq diff --git a/cpp/src/neighbors/ivf_sq/ivf_sq_serialize.cuh b/cpp/src/neighbors/ivf_sq/ivf_sq_serialize.cuh new file mode 100644 index 0000000000..b95e63ee33 --- /dev/null +++ b/cpp/src/neighbors/ivf_sq/ivf_sq_serialize.cuh @@ -0,0 +1,161 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include "../ivf_common.cuh" +#include "../ivf_list.cuh" +#include +#include + +#include +#include +#include +#include +#include + +#include + +namespace cuvs::neighbors::ivf_sq::detail { + +constexpr int serialization_version = 1; + +template +void serialize(raft::resources const& handle, std::ostream& os, const index& index_) +{ + RAFT_LOG_DEBUG( + "Saving IVF-SQ index, size %zu, dim %u", static_cast(index_.size()), index_.dim()); + + std::string dtype_string = raft::detail::numpy_serializer::get_numpy_dtype().to_string(); + dtype_string.resize(4); + os << dtype_string; + + serialize_scalar(handle, os, serialization_version); + serialize_scalar(handle, os, index_.size()); + serialize_scalar(handle, os, index_.dim()); + serialize_scalar(handle, os, index_.n_lists()); + serialize_scalar(handle, os, index_.metric()); + serialize_scalar(handle, os, index_.adaptive_centers()); + serialize_scalar(handle, os, index_.conservative_memory_allocation()); + serialize_mdspan(handle, os, index_.centers()); + + if (index_.center_norms()) { + bool has_norms = true; + serialize_scalar(handle, os, has_norms); + serialize_mdspan(handle, os, *index_.center_norms()); + } else { + bool has_norms = false; + serialize_scalar(handle, os, has_norms); + } + + serialize_mdspan(handle, os, index_.sq_vmin()); + serialize_mdspan(handle, os, index_.sq_delta()); + + auto sizes_host = raft::make_host_vector(index_.list_sizes().extent(0)); + raft::copy(sizes_host.data_handle(), + index_.list_sizes().data_handle(), + sizes_host.size(), + raft::resource::get_cuda_stream(handle)); + raft::resource::sync_stream(handle); + serialize_mdspan(handle, os, sizes_host.view()); + + list_spec list_store_spec{index_.dim(), true}; + for (uint32_t label = 0; label < index_.n_lists(); label++) { + ivf::serialize_list(handle, + os, + index_.lists()[label], + list_store_spec, + raft::Pow2::roundUp(sizes_host(label))); + } + raft::resource::sync_stream(handle); +} + +template +void serialize(raft::resources const& handle, + const std::string& filename, + const index& index_) +{ + std::ofstream of(filename, std::ios::out | std::ios::binary); + if (!of) { RAFT_FAIL("Cannot open file %s", filename.c_str()); } + detail::serialize(handle, of, index_); + of.close(); + if (!of) { RAFT_FAIL("Error writing output %s", filename.c_str()); } +} + +template +auto deserialize(raft::resources const& handle, std::istream& is) -> index +{ + char dtype_string[4]; + is.read(dtype_string, 4); + + auto ver = raft::deserialize_scalar(handle, is); + if (ver != serialization_version) { + RAFT_FAIL("serialization version mismatch, expected %d, got %d ", serialization_version, ver); + } + auto n_rows = raft::deserialize_scalar(handle, is); + auto dim = raft::deserialize_scalar(handle, is); + auto n_lists = raft::deserialize_scalar(handle, is); + auto metric = raft::deserialize_scalar(handle, is); + bool adaptive_centers = raft::deserialize_scalar(handle, is); + bool cma = raft::deserialize_scalar(handle, is); + + index index_ = index(handle, metric, n_lists, dim, adaptive_centers, cma); + + deserialize_mdspan(handle, is, index_.centers()); + + bool has_norms = raft::deserialize_scalar(handle, is); + if (has_norms) { + index_.allocate_center_norms(handle); + if (!index_.center_norms()) { + RAFT_FAIL("Error inconsistent center norms"); + } else { + auto center_norms = index_.center_norms().value(); + deserialize_mdspan(handle, is, center_norms); + } + } + + deserialize_mdspan(handle, is, index_.sq_vmin()); + deserialize_mdspan(handle, is, index_.sq_delta()); + + deserialize_mdspan(handle, is, index_.list_sizes()); + + list_spec list_device_spec{index_.dim(), cma}; + list_spec list_store_spec{index_.dim(), true}; + for (uint32_t label = 0; label < index_.n_lists(); label++) { + ivf::deserialize_list(handle, is, index_.lists()[label], list_store_spec, list_device_spec); + } + raft::resource::sync_stream(handle); + + ivf::detail::recompute_internal_state(handle, index_); + + return index_; +} + +template +auto deserialize(raft::resources const& handle, const std::string& filename) -> index +{ + std::ifstream is(filename, std::ios::in | std::ios::binary); + if (!is) { RAFT_FAIL("Cannot open file %s", filename.c_str()); } + auto index = detail::deserialize(handle, is); + is.close(); + return index; +} + +} // namespace cuvs::neighbors::ivf_sq::detail + +#define CUVS_INST_IVF_SQ_SERIALIZE(IdxT) \ + void serialize(raft::resources const& handle, \ + const std::string& filename, \ + const cuvs::neighbors::ivf_sq::index& index) \ + { \ + cuvs::neighbors::ivf_sq::detail::serialize(handle, filename, index); \ + } \ + \ + void deserialize(raft::resources const& handle, \ + const std::string& filename, \ + cuvs::neighbors::ivf_sq::index* index) \ + { \ + *index = cuvs::neighbors::ivf_sq::detail::deserialize(handle, filename); \ + } diff --git a/cpp/src/neighbors/ivf_sq/ivf_sq_serialize_uint8_t.cu b/cpp/src/neighbors/ivf_sq/ivf_sq_serialize_uint8_t.cu new file mode 100644 index 0000000000..c2351ed8c3 --- /dev/null +++ b/cpp/src/neighbors/ivf_sq/ivf_sq_serialize_uint8_t.cu @@ -0,0 +1,16 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#include + +#include "ivf_sq_serialize.cuh" + +namespace cuvs::neighbors::ivf_sq { + +CUVS_INST_IVF_SQ_SERIALIZE(uint8_t); + +#undef CUVS_INST_IVF_SQ_SERIALIZE + +} // namespace cuvs::neighbors::ivf_sq diff --git a/cpp/src/neighbors/ivf_sq_index.cpp b/cpp/src/neighbors/ivf_sq_index.cpp new file mode 100644 index 0000000000..d97ace7dcb --- /dev/null +++ b/cpp/src/neighbors/ivf_sq_index.cpp @@ -0,0 +1,236 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#include + +namespace cuvs::neighbors::ivf_sq { + +template +index::index(raft::resources const& res) + : index(res, cuvs::distance::DistanceType::L2Expanded, 0, 0, false, false) +{ +} + +template +index::index(raft::resources const& res, const index_params& params, uint32_t dim) + : index(res, + params.metric, + params.n_lists, + dim, + params.adaptive_centers, + params.conservative_memory_allocation) +{ +} + +template +index::index(raft::resources const& res, + cuvs::distance::DistanceType metric, + uint32_t n_lists, + uint32_t dim, + bool adaptive_centers, + bool conservative_memory_allocation) + : cuvs::neighbors::index(), + metric_(metric), + adaptive_centers_(adaptive_centers), + conservative_memory_allocation_(conservative_memory_allocation), + lists_{n_lists}, + list_sizes_{raft::make_device_vector(res, n_lists)}, + centers_(raft::make_device_matrix(res, n_lists, dim)), + center_norms_(std::nullopt), + sq_vmin_{raft::make_device_vector(res, dim)}, + sq_delta_{raft::make_device_vector(res, dim)}, + data_ptrs_{raft::make_device_vector(res, n_lists)}, + inds_ptrs_{raft::make_device_vector(res, n_lists)}, + accum_sorted_sizes_{raft::make_host_vector(n_lists + 1)} +{ + check_consistency(); + accum_sorted_sizes_(n_lists) = 0; +} + +template +cuvs::distance::DistanceType index::metric() const noexcept +{ + return metric_; +} + +template +bool index::adaptive_centers() const noexcept +{ + return adaptive_centers_; +} + +template +int64_t index::size() const noexcept +{ + return accum_sorted_sizes()(n_lists()); +} + +template +uint32_t index::dim() const noexcept +{ + return centers_.extent(1); +} + +template +uint32_t index::n_lists() const noexcept +{ + return lists_.size(); +} + +template +bool index::conservative_memory_allocation() const noexcept +{ + return conservative_memory_allocation_; +} + +template +raft::device_vector_view index::list_sizes() noexcept +{ + return list_sizes_.view(); +} + +template +raft::device_vector_view index::list_sizes() const noexcept +{ + return list_sizes_.view(); +} + +template +raft::device_matrix_view index::centers() noexcept +{ + return centers_.view(); +} + +template +raft::device_matrix_view index::centers() + const noexcept +{ + return centers_.view(); +} + +template +std::optional> index::center_norms() noexcept +{ + if (center_norms_.has_value()) { + return std::make_optional>(center_norms_->view()); + } else { + return std::nullopt; + } +} + +template +std::optional> index::center_norms() + const noexcept +{ + if (center_norms_.has_value()) { + return std::make_optional>( + center_norms_->view()); + } else { + return std::nullopt; + } +} + +template +void index::allocate_center_norms(raft::resources const& res) +{ + switch (metric_) { + case cuvs::distance::DistanceType::L2Expanded: + case cuvs::distance::DistanceType::L2SqrtExpanded: + case cuvs::distance::DistanceType::L2Unexpanded: + case cuvs::distance::DistanceType::L2SqrtUnexpanded: + case cuvs::distance::DistanceType::CosineExpanded: + center_norms_ = raft::make_device_vector(res, n_lists()); + break; + default: center_norms_ = std::nullopt; + } +} + +template +raft::device_vector_view index::sq_vmin() noexcept +{ + return sq_vmin_.view(); +} + +template +raft::device_vector_view index::sq_vmin() const noexcept +{ + return sq_vmin_.view(); +} + +template +raft::device_vector_view index::sq_delta() noexcept +{ + return sq_delta_.view(); +} + +template +raft::device_vector_view index::sq_delta() const noexcept +{ + return sq_delta_.view(); +} + +template +raft::host_vector_view index::accum_sorted_sizes() noexcept +{ + return accum_sorted_sizes_.view(); +} + +template +raft::host_vector_view index::accum_sorted_sizes() const noexcept +{ + return accum_sorted_sizes_.view(); +} + +template +raft::device_vector_view index::data_ptrs() noexcept +{ + return data_ptrs_.view(); +} + +template +raft::device_vector_view index::data_ptrs() const noexcept +{ + return data_ptrs_.view(); +} + +template +raft::device_vector_view index::inds_ptrs() noexcept +{ + return inds_ptrs_.view(); +} + +template +raft::device_vector_view index::inds_ptrs() const noexcept +{ + return inds_ptrs_.view(); +} + +template +std::vector>>& index::lists() noexcept +{ + return lists_; +} + +template +const std::vector>>& index::lists() const noexcept +{ + return lists_; +} + +template +void index::check_consistency() +{ + auto n_lists = lists_.size(); + RAFT_EXPECTS(list_sizes_.extent(0) == n_lists, "inconsistent list size"); + RAFT_EXPECTS(data_ptrs_.extent(0) == n_lists, "inconsistent list size"); + RAFT_EXPECTS(inds_ptrs_.extent(0) == n_lists, "inconsistent list size"); + RAFT_EXPECTS((centers_.extent(0) == list_sizes_.extent(0)) && + (!center_norms_.has_value() || centers_.extent(0) == center_norms_->extent(0)), + "inconsistent number of lists (clusters)"); +} + +template struct index; + +} // namespace cuvs::neighbors::ivf_sq diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 35794adf9b..208c330de7 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -131,6 +131,13 @@ ConfigureTest( PERCENT 100 ) +ConfigureTest( + NAME NEIGHBORS_ANN_IVF_SQ_TEST + PATH neighbors/ann_ivf_sq/test_float_uint8_t.cu + GPUS 1 + PERCENT 100 +) + ConfigureTest( NAME NEIGHBORS_ANN_IVF_PQ_TEST PATH neighbors/ann_ivf_pq/test_float_int64_t.cu neighbors/ann_ivf_pq/test_int8_t_int64_t.cu diff --git a/cpp/tests/neighbors/ann_ivf_sq.cuh b/cpp/tests/neighbors/ann_ivf_sq.cuh new file mode 100644 index 0000000000..a7e02315e4 --- /dev/null +++ b/cpp/tests/neighbors/ann_ivf_sq.cuh @@ -0,0 +1,457 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ +#pragma once + +#include "../test_utils.cuh" +#include "ann_utils.cuh" +#include "naive_knn.cuh" + +#include +#include +#include +#include + +#include +#include + +namespace cuvs::neighbors::ivf_sq { + +struct test_ivf_sample_filter { + static constexpr unsigned offset = 300; +}; + +template +struct AnnIvfSqInputs { + IdxT num_queries; + IdxT num_db_vecs; + IdxT dim; + IdxT k; + IdxT nprobe; + IdxT nlist; + cuvs::distance::DistanceType metric; + bool adaptive_centers; +}; + +template +::std::ostream& operator<<(::std::ostream& os, const AnnIvfSqInputs& p) +{ + os << "{ " << p.num_queries << ", " << p.num_db_vecs << ", " << p.dim << ", " << p.k << ", " + << p.nprobe << ", " << p.nlist << ", " + << cuvs::neighbors::print_metric{static_cast((int)p.metric)} + << ", " << p.adaptive_centers << '}' << std::endl; + return os; +} + +template +class AnnIVFSQTest : public ::testing::TestWithParam> { + public: + AnnIVFSQTest() + : stream_(raft::resource::get_cuda_stream(handle_)), + ps(::testing::TestWithParam>::GetParam()), + database(0, stream_), + search_queries(0, stream_) + { + } + + void testIVFSQ() + { + size_t queries_size = ps.num_queries * ps.k; + std::vector indices_ivfsq(queries_size); + std::vector indices_naive(queries_size); + std::vector distances_ivfsq(queries_size); + std::vector distances_naive(queries_size); + + { + rmm::device_uvector distances_naive_dev(queries_size, stream_); + rmm::device_uvector indices_naive_dev(queries_size, stream_); + cuvs::neighbors::naive_knn(handle_, + distances_naive_dev.data(), + indices_naive_dev.data(), + search_queries.data(), + database.data(), + ps.num_queries, + ps.num_db_vecs, + ps.dim, + ps.k, + ps.metric); + raft::update_host(distances_naive.data(), distances_naive_dev.data(), queries_size, stream_); + raft::update_host(indices_naive.data(), indices_naive_dev.data(), queries_size, stream_); + raft::resource::sync_stream(handle_); + } + + { + double min_recall = + std::min(1.0, static_cast(ps.nprobe) / static_cast(ps.nlist)); + + rmm::device_uvector distances_ivfsq_dev(queries_size, stream_); + rmm::device_uvector indices_ivfsq_dev(queries_size, stream_); + + { + cuvs::neighbors::ivf_sq::index_params index_params; + cuvs::neighbors::ivf_sq::search_params search_params; + index_params.n_lists = ps.nlist; + index_params.metric = ps.metric; + index_params.adaptive_centers = ps.adaptive_centers; + search_params.n_probes = ps.nprobe; + + index_params.add_data_on_build = true; + index_params.kmeans_trainset_fraction = 0.5; + + auto database_view = raft::make_device_matrix_view( + (const DataT*)database.data(), ps.num_db_vecs, ps.dim); + + auto idx = cuvs::neighbors::ivf_sq::build(handle_, index_params, database_view); + + // Test extend: build without data, then extend + cuvs::neighbors::ivf_sq::index_params index_params_no_add; + index_params_no_add.n_lists = ps.nlist; + index_params_no_add.metric = ps.metric; + index_params_no_add.adaptive_centers = ps.adaptive_centers; + index_params_no_add.add_data_on_build = false; + index_params_no_add.kmeans_trainset_fraction = 0.5; + + auto idx_empty = + cuvs::neighbors::ivf_sq::build(handle_, index_params_no_add, database_view); + + auto vector_indices = raft::make_device_vector(handle_, ps.num_db_vecs); + raft::linalg::map_offset(handle_, vector_indices.view(), raft::identity_op{}); + raft::resource::sync_stream(handle_); + + auto indices_view = raft::make_device_vector_view( + vector_indices.data_handle(), ps.num_db_vecs); + cuvs::neighbors::ivf_sq::extend( + handle_, + database_view, + std::make_optional>(indices_view), + &idx_empty); + + // Serialize / deserialize round-trip + tmp_index_file index_file; + cuvs::neighbors::ivf_sq::serialize(handle_, index_file.filename, idx); + cuvs::neighbors::ivf_sq::index index_loaded(handle_); + cuvs::neighbors::ivf_sq::deserialize(handle_, index_file.filename, &index_loaded); + ASSERT_EQ(idx.size(), index_loaded.size()); + ASSERT_EQ(idx.dim(), index_loaded.dim()); + ASSERT_EQ(idx.n_lists(), index_loaded.n_lists()); + + auto search_queries_view = raft::make_device_matrix_view( + search_queries.data(), ps.num_queries, ps.dim); + auto indices_out_view = + raft::make_device_matrix_view(indices_ivfsq_dev.data(), ps.num_queries, ps.k); + auto dists_out_view = + raft::make_device_matrix_view(distances_ivfsq_dev.data(), ps.num_queries, ps.k); + + cuvs::neighbors::ivf_sq::search(handle_, + search_params, + index_loaded, + search_queries_view, + indices_out_view, + dists_out_view); + + raft::update_host( + distances_ivfsq.data(), distances_ivfsq_dev.data(), queries_size, stream_); + raft::update_host(indices_ivfsq.data(), indices_ivfsq_dev.data(), queries_size, stream_); + raft::resource::sync_stream(handle_); + } + // SQ introduces quantization error, so we relax the distance epsilon + float eps = 0.1; + ASSERT_TRUE(eval_neighbours(indices_naive, + indices_ivfsq, + distances_naive, + distances_ivfsq, + ps.num_queries, + ps.k, + eps, + min_recall)); + } + } + + void testFilter() + { + if (ps.num_db_vecs <= static_cast(test_ivf_sample_filter::offset)) { + GTEST_SKIP() << "Skipping filter test: num_db_vecs <= filter offset"; + } + + size_t queries_size = ps.num_queries * ps.k; + std::vector indices_ivfsq(queries_size); + std::vector indices_naive(queries_size); + std::vector distances_ivfsq(queries_size); + std::vector distances_naive(queries_size); + + { + rmm::device_uvector distances_naive_dev(queries_size, stream_); + rmm::device_uvector indices_naive_dev(queries_size, stream_); + auto* database_filtered_ptr = database.data() + test_ivf_sample_filter::offset * ps.dim; + cuvs::neighbors::naive_knn(handle_, + distances_naive_dev.data(), + indices_naive_dev.data(), + search_queries.data(), + database_filtered_ptr, + ps.num_queries, + ps.num_db_vecs - test_ivf_sample_filter::offset, + ps.dim, + ps.k, + ps.metric); + raft::linalg::addScalar(indices_naive_dev.data(), + indices_naive_dev.data(), + IdxT(test_ivf_sample_filter::offset), + queries_size, + stream_); + raft::update_host(distances_naive.data(), distances_naive_dev.data(), queries_size, stream_); + raft::update_host(indices_naive.data(), indices_naive_dev.data(), queries_size, stream_); + raft::resource::sync_stream(handle_); + } + + { + double min_recall = + std::min(1.0, static_cast(ps.nprobe) / static_cast(ps.nlist)); + + rmm::device_uvector distances_ivfsq_dev(queries_size, stream_); + rmm::device_uvector indices_ivfsq_dev(queries_size, stream_); + + { + cuvs::neighbors::ivf_sq::index_params index_params; + cuvs::neighbors::ivf_sq::search_params search_params; + index_params.n_lists = ps.nlist; + index_params.metric = ps.metric; + index_params.adaptive_centers = ps.adaptive_centers; + search_params.n_probes = ps.nprobe; + + index_params.add_data_on_build = true; + index_params.kmeans_trainset_fraction = 0.5; + + auto database_view = raft::make_device_matrix_view( + (const DataT*)database.data(), ps.num_db_vecs, ps.dim); + auto index = cuvs::neighbors::ivf_sq::build(handle_, index_params, database_view); + + auto removed_indices = + raft::make_device_vector(handle_, test_ivf_sample_filter::offset); + raft::linalg::map_offset(handle_, removed_indices.view(), raft::identity_op{}); + raft::resource::sync_stream(handle_); + + cuvs::core::bitset removed_indices_bitset( + handle_, removed_indices.view(), ps.num_db_vecs); + auto bitset_filter_obj = + cuvs::neighbors::filtering::bitset_filter(removed_indices_bitset.view()); + + auto search_queries_view = raft::make_device_matrix_view( + search_queries.data(), ps.num_queries, ps.dim); + auto indices_out_view = + raft::make_device_matrix_view(indices_ivfsq_dev.data(), ps.num_queries, ps.k); + auto dists_out_view = + raft::make_device_matrix_view(distances_ivfsq_dev.data(), ps.num_queries, ps.k); + + cuvs::neighbors::ivf_sq::search(handle_, + search_params, + index, + search_queries_view, + indices_out_view, + dists_out_view, + bitset_filter_obj); + + raft::update_host( + distances_ivfsq.data(), distances_ivfsq_dev.data(), queries_size, stream_); + raft::update_host(indices_ivfsq.data(), indices_ivfsq_dev.data(), queries_size, stream_); + raft::resource::sync_stream(handle_); + } + float eps = 0.1; + ASSERT_TRUE(eval_neighbours(indices_naive, + indices_ivfsq, + distances_naive, + distances_ivfsq, + ps.num_queries, + ps.k, + eps, + min_recall)); + } + } + + void SetUp() override + { + database.resize(ps.num_db_vecs * ps.dim, stream_); + search_queries.resize(ps.num_queries * ps.dim, stream_); + + raft::random::RngState r(1234ULL); + if constexpr (std::is_same_v || std::is_same_v) { + raft::random::uniform( + handle_, r, database.data(), ps.num_db_vecs * ps.dim, DataT(0.1), DataT(2.0)); + raft::random::uniform( + handle_, r, search_queries.data(), ps.num_queries * ps.dim, DataT(0.1), DataT(2.0)); + } + raft::resource::sync_stream(handle_); + } + + void TearDown() override + { + raft::resource::sync_stream(handle_); + database.resize(0, stream_); + search_queries.resize(0, stream_); + } + + private: + raft::resources handle_; + rmm::cuda_stream_view stream_; + AnnIvfSqInputs ps; + rmm::device_uvector database; + rmm::device_uvector search_queries; +}; + +const std::vector> inputs = { + // num_queries, num_db_vecs, dim, k, nprobe, nlist, metric, adaptive_centers + + // ===== Dimension edge cases (all four metrics) ===== + // dim=1 (CosineExpanded excluded: requires dim > 1) + {1000, 10000, 1, 10, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false}, + {1000, 10000, 1, 10, 40, 1024, cuvs::distance::DistanceType::InnerProduct, false}, + {1000, 10000, 1, 10, 40, 1024, cuvs::distance::DistanceType::L2SqrtExpanded, false}, + // dim=2,3,4,5 (unaligned) + {1000, 10000, 2, 16, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false}, + {1000, 10000, 2, 16, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, false}, + {1000, 10000, 3, 16, 40, 1024, cuvs::distance::DistanceType::L2Expanded, true}, + {1000, 10000, 3, 16, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, true}, + {1000, 10000, 4, 16, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false}, + {1000, 10000, 4, 16, 40, 1024, cuvs::distance::DistanceType::InnerProduct, false}, + {1000, 10000, 5, 16, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false}, + {1000, 10000, 5, 16, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, false}, + // dim=7,8 (around veclen=16 boundary, not a multiple of veclen) + {1000, 10000, 7, 16, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false}, + {1000, 10000, 7, 16, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, false}, + {1000, 10000, 8, 16, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false}, + {1000, 10000, 8, 16, 40, 1024, cuvs::distance::DistanceType::InnerProduct, true}, + {1000, 10000, 8, 16, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, true}, + // dim=15,16,17 (around veclen=16 boundary) + {1000, 10000, 15, 10, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false}, + {1000, 10000, 15, 10, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, false}, + {1000, 10000, 16, 10, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false}, + {1000, 10000, 16, 10, 40, 1024, cuvs::distance::DistanceType::InnerProduct, false}, + {1000, 10000, 16, 10, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, false}, + {1000, 10000, 16, 10, 40, 1024, cuvs::distance::DistanceType::L2SqrtExpanded, false}, + {1000, 10000, 17, 10, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false}, + {1000, 10000, 17, 10, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, false}, + // dim=31,32,33 (around 2*veclen boundary) + {1000, 10000, 31, 10, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false}, + {1000, 10000, 31, 10, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, false}, + {1000, 10000, 32, 10, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false}, + {1000, 10000, 32, 10, 40, 1024, cuvs::distance::DistanceType::InnerProduct, false}, + {1000, 10000, 32, 10, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, false}, + {1000, 10000, 33, 10, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false}, + {1000, 10000, 33, 10, 40, 1024, cuvs::distance::DistanceType::InnerProduct, false}, + // medium dims + {1000, 10000, 64, 10, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false}, + {1000, 10000, 64, 10, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, false}, + {1000, 10000, 128, 10, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false}, + {1000, 10000, 128, 10, 40, 1024, cuvs::distance::DistanceType::InnerProduct, false}, + {1000, 10000, 128, 10, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, false}, + {1000, 10000, 128, 10, 40, 1024, cuvs::distance::DistanceType::L2SqrtExpanded, false}, + {1000, 10000, 256, 10, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false}, + {1000, 10000, 256, 10, 40, 1024, cuvs::distance::DistanceType::InnerProduct, false}, + // large dims (may exceed shared memory limits) + {1000, 10000, 2048, 16, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false}, + {1000, 10000, 2048, 16, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, false}, + {1000, 10000, 2049, 16, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false}, + {1000, 10000, 2049, 16, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, false}, + {1000, 10000, 2050, 16, 40, 1024, cuvs::distance::DistanceType::InnerProduct, false}, + {1000, 10000, 2050, 16, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, false}, + {1000, 10000, 4096, 20, 50, 1024, cuvs::distance::DistanceType::L2Expanded, false}, + {1000, 10000, 4096, 20, 50, 1024, cuvs::distance::DistanceType::InnerProduct, false}, + {1000, 10000, 4096, 20, 50, 1024, cuvs::distance::DistanceType::CosineExpanded, false}, + + // ===== k edge cases ===== + {1000, 10000, 16, 1, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false}, + {1000, 10000, 16, 1, 40, 1024, cuvs::distance::DistanceType::InnerProduct, false}, + {1000, 10000, 16, 1, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, false}, + {1000, 10000, 16, 2, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false}, + {1000, 10000, 16, 5, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false}, + {1000, 10000, 16, 10, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false}, + {1000, 10000, 16, 20, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false}, + {1000, 10000, 16, 20, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, false}, + {1000, 10000, 16, 50, 100, 1024, cuvs::distance::DistanceType::L2Expanded, false}, + {1000, 10000, 16, 100, 200, 1024, cuvs::distance::DistanceType::L2Expanded, false}, + {1000, 10000, 16, 100, 200, 1024, cuvs::distance::DistanceType::InnerProduct, false}, + + // ===== nprobe / nlist edge cases ===== + // nprobe == nlist (exhaustive probe) + {1000, 10000, 16, 10, 64, 64, cuvs::distance::DistanceType::L2Expanded, false}, + {1000, 10000, 16, 10, 64, 64, cuvs::distance::DistanceType::InnerProduct, false}, + {1000, 10000, 16, 10, 64, 64, cuvs::distance::DistanceType::CosineExpanded, false}, + // nprobe == 1 (minimal probe) + {1000, 10000, 16, 10, 1, 1024, cuvs::distance::DistanceType::L2Expanded, false}, + {1000, 10000, 16, 10, 1, 1024, cuvs::distance::DistanceType::CosineExpanded, false}, + // nprobe > nlist (clamped to nlist) + {1000, 10000, 16, 10, 2048, 1024, cuvs::distance::DistanceType::L2Expanded, false}, + {1000, 10000, 16, 10, 2048, 1024, cuvs::distance::DistanceType::CosineExpanded, false}, + // various nprobe + {1000, 10000, 16, 10, 50, 1024, cuvs::distance::DistanceType::L2Expanded, false}, + {1000, 10000, 16, 10, 70, 1024, cuvs::distance::DistanceType::L2Expanded, false}, + {1000, 10000, 16, 10, 50, 1024, cuvs::distance::DistanceType::InnerProduct, false}, + {1000, 10000, 16, 10, 70, 1024, cuvs::distance::DistanceType::InnerProduct, false}, + {1000, 10000, 16, 10, 50, 1024, cuvs::distance::DistanceType::CosineExpanded, false}, + {1000, 10000, 16, 10, 70, 1024, cuvs::distance::DistanceType::CosineExpanded, false}, + {1000, 10000, 16, 10, 50, 1024, cuvs::distance::DistanceType::L2SqrtExpanded, false}, + {1000, 10000, 16, 10, 70, 1024, cuvs::distance::DistanceType::L2SqrtExpanded, false}, + // very small nlist + {100, 10000, 16, 10, 8, 8, cuvs::distance::DistanceType::L2Expanded, false}, + {100, 10000, 16, 10, 8, 8, cuvs::distance::DistanceType::CosineExpanded, false}, + // smaller nlist + {100, 10000, 16, 10, 20, 512, cuvs::distance::DistanceType::L2Expanded, false}, + {100, 10000, 16, 10, 20, 512, cuvs::distance::DistanceType::InnerProduct, false}, + {100, 10000, 16, 10, 20, 512, cuvs::distance::DistanceType::CosineExpanded, false}, + {100, 10000, 16, 10, 20, 512, cuvs::distance::DistanceType::L2SqrtExpanded, false}, + + // ===== Dataset size edge cases ===== + // single query + {1, 10000, 16, 10, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false}, + {1, 10000, 16, 10, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, false}, + // very few queries + {2, 10000, 16, 10, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false}, + {5, 10000, 16, 10, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, false}, + // very few db vectors (nlist reduced to fit) + {100, 500, 16, 10, 40, 256, cuvs::distance::DistanceType::L2Expanded, false}, + {100, 500, 16, 10, 40, 256, cuvs::distance::DistanceType::CosineExpanded, false}, + // small db with many empty clusters + {100, 100, 16, 5, 20, 64, cuvs::distance::DistanceType::L2Expanded, false}, + {100, 100, 16, 5, 20, 64, cuvs::distance::DistanceType::CosineExpanded, false}, + // larger datasets + {20, 100000, 16, 10, 20, 1024, cuvs::distance::DistanceType::L2Expanded, true}, + {20, 100000, 16, 10, 20, 1024, cuvs::distance::DistanceType::CosineExpanded, true}, + {1000, 100000, 16, 10, 20, 1024, cuvs::distance::DistanceType::L2Expanded, true}, + {1000, 100000, 16, 10, 20, 1024, cuvs::distance::DistanceType::CosineExpanded, true}, + {10000, 131072, 8, 10, 20, 1024, cuvs::distance::DistanceType::L2Expanded, false}, + {10000, 131072, 8, 10, 20, 1024, cuvs::distance::DistanceType::CosineExpanded, false}, + {10000, 131072, 8, 10, 50, 1024, cuvs::distance::DistanceType::InnerProduct, true}, + {10000, 131072, 8, 10, 50, 1024, cuvs::distance::DistanceType::L2SqrtExpanded, false}, + + // ===== Large query batches (gridDim.x > 65535) ===== + {100000, 1024, 32, 10, 64, 64, cuvs::distance::DistanceType::L2Expanded, false}, + {100000, 1024, 32, 10, 64, 64, cuvs::distance::DistanceType::InnerProduct, false}, + {100000, 1024, 32, 10, 64, 64, cuvs::distance::DistanceType::CosineExpanded, false}, + {100000, 1024, 32, 10, 64, 64, cuvs::distance::DistanceType::L2SqrtExpanded, false}, + {100000, 8712, 3, 10, 51, 66, cuvs::distance::DistanceType::L2Expanded, false}, + {100000, 8712, 3, 10, 51, 66, cuvs::distance::DistanceType::CosineExpanded, false}, + // just above the old 65535 limit + {65536, 1024, 16, 10, 32, 64, cuvs::distance::DistanceType::L2Expanded, false}, + {65536, 1024, 16, 10, 32, 64, cuvs::distance::DistanceType::CosineExpanded, false}, + + // ===== Adaptive centers (all four metrics, multiple dims) ===== + {1000, 10000, 8, 10, 40, 1024, cuvs::distance::DistanceType::L2Expanded, true}, + {1000, 10000, 8, 10, 40, 1024, cuvs::distance::DistanceType::InnerProduct, true}, + {1000, 10000, 8, 10, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, true}, + {1000, 10000, 8, 10, 40, 1024, cuvs::distance::DistanceType::L2SqrtExpanded, true}, + {1000, 10000, 16, 10, 40, 1024, cuvs::distance::DistanceType::L2Expanded, true}, + {1000, 10000, 16, 10, 40, 1024, cuvs::distance::DistanceType::InnerProduct, true}, + {1000, 10000, 16, 10, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, true}, + {1000, 10000, 16, 10, 40, 1024, cuvs::distance::DistanceType::L2SqrtExpanded, true}, + {1000, 10000, 32, 10, 50, 1024, cuvs::distance::DistanceType::L2Expanded, true}, + {1000, 10000, 32, 10, 50, 1024, cuvs::distance::DistanceType::InnerProduct, true}, + {1000, 10000, 32, 10, 50, 1024, cuvs::distance::DistanceType::CosineExpanded, true}, + {1000, 10000, 128, 10, 40, 1024, cuvs::distance::DistanceType::L2Expanded, true}, + {1000, 10000, 128, 10, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, true}, + + // ===== Recall-stability: same data, different query counts ===== + {20000, 8712, 3, 10, 51, 66, cuvs::distance::DistanceType::L2Expanded, false}, + {50000, 8712, 3, 10, 51, 66, cuvs::distance::DistanceType::L2Expanded, false}, +}; + +} // namespace cuvs::neighbors::ivf_sq diff --git a/cpp/tests/neighbors/ann_ivf_sq/test_float_uint8_t.cu b/cpp/tests/neighbors/ann_ivf_sq/test_float_uint8_t.cu new file mode 100644 index 0000000000..02ec8a7dfc --- /dev/null +++ b/cpp/tests/neighbors/ann_ivf_sq/test_float_uint8_t.cu @@ -0,0 +1,21 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#include + +#include "../ann_ivf_sq.cuh" + +namespace cuvs::neighbors::ivf_sq { + +typedef AnnIVFSQTest AnnIVFSQTestF_float; +TEST_P(AnnIVFSQTestF_float, AnnIVFSQ) +{ + this->testIVFSQ(); + this->testFilter(); +} + +INSTANTIATE_TEST_CASE_P(AnnIVFSQTest, AnnIVFSQTestF_float, ::testing::ValuesIn(inputs)); + +} // namespace cuvs::neighbors::ivf_sq diff --git a/python/cuvs_bench/cuvs_bench/config/algorithms.yaml b/python/cuvs_bench/cuvs_bench/config/algorithms.yaml index fa2195fc61..3a787f65ab 100644 --- a/python/cuvs_bench/cuvs_bench/config/algorithms.yaml +++ b/python/cuvs_bench/cuvs_bench/config/algorithms.yaml @@ -34,6 +34,9 @@ cuvs_ivf_flat: cuvs_ivf_pq: executable: CUVS_IVF_PQ_ANN_BENCH requires_gpu: true +cuvs_ivf_sq: + executable: CUVS_IVF_SQ_ANN_BENCH + requires_gpu: true cuvs_cagra: executable: CUVS_CAGRA_ANN_BENCH requires_gpu: true diff --git a/python/cuvs_bench/cuvs_bench/config/algos/cuvs_ivf_sq.yaml b/python/cuvs_bench/cuvs_bench/config/algos/cuvs_ivf_sq.yaml new file mode 100644 index 0000000000..711f3e8ce8 --- /dev/null +++ b/python/cuvs_bench/cuvs_bench/config/algos/cuvs_ivf_sq.yaml @@ -0,0 +1,16 @@ +name: cuvs_ivf_sq +groups: + base: + build: + nlist: [1024, 2048, 4096, 8192, 16384, 32000, 64000] + ratio: [1, 2, 4] + niter: [20, 25] + search: + nprobe: [1, 5, 10, 50, 100, 200, 500, 1000, 2000] + test: + build: + nlist: [1024] + ratio: [1] + niter: [20] + search: + nprobe: [1, 5] From cf19a8629c3377426a2a6bfa3b3e7d900044b42a Mon Sep 17 00:00:00 2001 From: vic Date: Mon, 2 Mar 2026 11:25:03 +0100 Subject: [PATCH 2/5] add IVF-SQ bench constraints --- .../cuvs_bench/config/algos/constraints/__init__.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/python/cuvs_bench/cuvs_bench/config/algos/constraints/__init__.py b/python/cuvs_bench/cuvs_bench/config/algos/constraints/__init__.py index 9111bdc3b9..ea2afe351e 100644 --- a/python/cuvs_bench/cuvs_bench/config/algos/constraints/__init__.py +++ b/python/cuvs_bench/cuvs_bench/config/algos/constraints/__init__.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 @@ -50,6 +50,12 @@ def cuvs_cagra_search(params, build_params, k, batch_size): return True +def cuvs_ivf_sq_search(params, build_params, k, batch_size): + if "nlist" in build_params and "nprobe" in params: + return build_params["nlist"] >= params["nprobe"] + return True + + ############################################################################### # FAISS constraints # ############################################################################### From 6a95e8a8215016a01127d9c62e97cbb2fffa1cac Mon Sep 17 00:00:00 2001 From: vic Date: Mon, 2 Mar 2026 12:21:49 +0100 Subject: [PATCH 3/5] Update default IVF-SQ benchmark config --- .../cuvs_bench/config/algos/cuvs_ivf_sq.yaml | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/python/cuvs_bench/cuvs_bench/config/algos/cuvs_ivf_sq.yaml b/python/cuvs_bench/cuvs_bench/config/algos/cuvs_ivf_sq.yaml index 711f3e8ce8..adaad54e04 100644 --- a/python/cuvs_bench/cuvs_bench/config/algos/cuvs_ivf_sq.yaml +++ b/python/cuvs_bench/cuvs_bench/config/algos/cuvs_ivf_sq.yaml @@ -1,12 +1,21 @@ name: cuvs_ivf_sq +constraints: + search: cuvs_bench.config.algos.constraints.cuvs_ivf_sq_search groups: base: build: - nlist: [1024, 2048, 4096, 8192, 16384, 32000, 64000] - ratio: [1, 2, 4] - niter: [20, 25] + nlist: [1024, 2048, 4096, 8192] + ratio: [1, 2] + niter: [25] search: - nprobe: [1, 5, 10, 50, 100, 200, 500, 1000, 2000] + nprobe: [1, 5, 10, 20, 50, 100, 200, 500] + large: + build: + nlist: [8192, 16384, 32000, 64000] + ratio: [2, 4] + niter: [20] + search: + nprobe: [10, 20, 50, 100, 200, 500, 1000, 2000] test: build: nlist: [1024] From b652160cd37cd03218dfc90a0f9266f4624ca076 Mon Sep 17 00:00:00 2001 From: vic Date: Wed, 11 Mar 2026 16:32:42 +0100 Subject: [PATCH 4/5] IVF-SQ C API --- c/CMakeLists.txt | 1 + c/include/cuvs/core/all.h | 1 + c/include/cuvs/neighbors/ivf_sq.h | 373 +++++++++++++++++++++ c/src/neighbors/ivf_sq.cpp | 363 ++++++++++++++++++++ c/src/neighbors/ivf_sq.hpp | 14 + c/tests/CMakeLists.txt | 3 +- c/tests/neighbors/ann_ivf_sq_c.cu | 130 +++++++ c/tests/neighbors/c_api.c | 13 +- c/tests/neighbors/run_ivf_sq_c.c | 86 +++++ cpp/src/neighbors/ivf_sq/ivf_sq_search.cuh | 2 +- 10 files changed, 983 insertions(+), 3 deletions(-) create mode 100644 c/include/cuvs/neighbors/ivf_sq.h create mode 100644 c/src/neighbors/ivf_sq.cpp create mode 100644 c/src/neighbors/ivf_sq.hpp create mode 100644 c/tests/neighbors/ann_ivf_sq_c.cu create mode 100644 c/tests/neighbors/run_ivf_sq_c.c diff --git a/c/CMakeLists.txt b/c/CMakeLists.txt index 22a25c24d0..980cb78f29 100644 --- a/c/CMakeLists.txt +++ b/c/CMakeLists.txt @@ -89,6 +89,7 @@ add_library( src/neighbors/brute_force.cpp src/neighbors/ivf_flat.cpp src/neighbors/ivf_pq.cpp + src/neighbors/ivf_sq.cpp src/neighbors/cagra.cpp $<$:src/neighbors/hnsw.cpp> $<$:src/neighbors/mg_ivf_pq.cpp> diff --git a/c/include/cuvs/core/all.h b/c/include/cuvs/core/all.h index cc83684925..0847cc63da 100644 --- a/c/include/cuvs/core/all.h +++ b/c/include/cuvs/core/all.h @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include diff --git a/c/include/cuvs/neighbors/ivf_sq.h b/c/include/cuvs/neighbors/ivf_sq.h new file mode 100644 index 0000000000..6b312443b0 --- /dev/null +++ b/c/include/cuvs/neighbors/ivf_sq.h @@ -0,0 +1,373 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @defgroup ivf_sq_c_index_params IVF-SQ index build parameters + * @{ + */ +/** + * @brief Supplemental parameters to build IVF-SQ Index + * + */ +struct cuvsIvfSqIndexParams { + /** Distance type. */ + cuvsDistanceType metric; + /** The argument used by some distance metrics. */ + float metric_arg; + /** + * Whether to add the dataset content to the index, i.e.: + * + * - `true` means the index is filled with the dataset vectors and ready to search after calling + * `build`. + * - `false` means `build` only trains the underlying model (e.g. quantizer or clustering), but + * the index is left empty; you'd need to call `extend` on the index afterwards to populate it. + */ + bool add_data_on_build; + /** The number of inverted lists (clusters) */ + uint32_t n_lists; + /** The number of iterations searching for kmeans centers (index building). */ + uint32_t kmeans_n_iters; + /** The fraction of data to use during iterative kmeans building. */ + double kmeans_trainset_fraction; + /** + * By default (adaptive_centers = false), the cluster centers are trained in `ivf_sq::build`, + * and never modified in `ivf_sq::extend`. As a result, you may need to retrain the index + * from scratch after invoking (`ivf_sq::extend`) a few times with new data, the distribution of + * which is no longer representative of the original training set. + * + * The alternative behavior (adaptive_centers = true) is to update the cluster centers for new + * data when it is added. In this case, `index.centers()` are always exactly the centroids of the + * data in the corresponding clusters. The drawback of this behavior is that the centroids depend + * on the order of adding new data (through the classification of the added data); that is, + * `index.centers()` "drift" together with the changing distribution of the newly added data. + */ + bool adaptive_centers; + /** + * By default, the algorithm allocates more space than necessary for individual clusters + * (`list_data`). This allows to amortize the cost of memory allocation and reduce the number of + * data copies during repeated calls to `extend` (extending the database). + * + * The alternative is the conservative allocation behavior; when enabled, the algorithm always + * allocates the minimum amount of memory required to store the given number of records. Set this + * flag to `true` if you prefer to use as little GPU memory for the database as possible. + */ + bool conservative_memory_allocation; +}; + +typedef struct cuvsIvfSqIndexParams* cuvsIvfSqIndexParams_t; + +/** + * @brief Allocate IVF-SQ Index params, and populate with default values + * + * @param[in] index_params cuvsIvfSqIndexParams_t to allocate + * @return cuvsError_t + */ +cuvsError_t cuvsIvfSqIndexParamsCreate(cuvsIvfSqIndexParams_t* index_params); + +/** + * @brief De-allocate IVF-SQ Index params + * + * @param[in] index_params + * @return cuvsError_t + */ +cuvsError_t cuvsIvfSqIndexParamsDestroy(cuvsIvfSqIndexParams_t index_params); +/** + * @} + */ + +/** + * @defgroup ivf_sq_c_search_params IVF-SQ index search parameters + * @{ + */ +/** + * @brief Supplemental parameters to search IVF-SQ index + * + */ +struct cuvsIvfSqSearchParams { + /** The number of clusters to search. */ + uint32_t n_probes; +}; + +typedef struct cuvsIvfSqSearchParams* cuvsIvfSqSearchParams_t; + +/** + * @brief Allocate IVF-SQ search params, and populate with default values + * + * @param[in] params cuvsIvfSqSearchParams_t to allocate + * @return cuvsError_t + */ +cuvsError_t cuvsIvfSqSearchParamsCreate(cuvsIvfSqSearchParams_t* params); + +/** + * @brief De-allocate IVF-SQ search params + * + * @param[in] params + * @return cuvsError_t + */ +cuvsError_t cuvsIvfSqSearchParamsDestroy(cuvsIvfSqSearchParams_t params); +/** + * @} + */ + +/** + * @defgroup ivf_sq_c_index IVF-SQ index + * @{ + */ +/** + * @brief Struct to hold address of cuvs::neighbors::ivf_sq::index and its active trained dtype + * + */ +typedef struct { + uintptr_t addr; + DLDataType dtype; +} cuvsIvfSqIndex; + +typedef cuvsIvfSqIndex* cuvsIvfSqIndex_t; + +/** + * @brief Allocate IVF-SQ index + * + * @param[in] index cuvsIvfSqIndex_t to allocate + * @return cuvsError_t + */ +cuvsError_t cuvsIvfSqIndexCreate(cuvsIvfSqIndex_t* index); + +/** + * @brief De-allocate IVF-SQ index + * + * @param[in] index cuvsIvfSqIndex_t to de-allocate + */ +cuvsError_t cuvsIvfSqIndexDestroy(cuvsIvfSqIndex_t index); + +/** Get the number of clusters/inverted lists */ +cuvsError_t cuvsIvfSqIndexGetNLists(cuvsIvfSqIndex_t index, int64_t* n_lists); + +/** Get the dimensionality of the data */ +cuvsError_t cuvsIvfSqIndexGetDim(cuvsIvfSqIndex_t index, int64_t* dim); + +/** Get the size of the index */ +cuvsError_t cuvsIvfSqIndexGetSize(cuvsIvfSqIndex_t index, int64_t* size); + +/** + * @brief Get the cluster centers corresponding to the lists [n_lists, dim] + * + * @param[in] index cuvsIvfSqIndex_t Built Ivf-SQ Index + * @param[out] centers Preallocated array on host or device memory to store output, [n_lists, dim] + * @return cuvsError_t + */ +cuvsError_t cuvsIvfSqIndexGetCenters(cuvsIvfSqIndex_t index, DLManagedTensor* centers); + +/** + * @} + */ + +/** + * @defgroup ivf_sq_c_index_build IVF-SQ index build + * @{ + */ +/** + * @brief Build an IVF-SQ index with a `DLManagedTensor` which has underlying + * `DLDeviceType` equal to `kDLCUDA`, `kDLCUDAHost`, `kDLCUDAManaged`, + * or `kDLCPU`. Also, acceptable underlying types are: + * 1. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32` + * 2. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 16` + * + * @code {.c} + * #include + * #include + * + * // Create cuvsResources_t + * cuvsResources_t res; + * cuvsError_t res_create_status = cuvsResourcesCreate(&res); + * + * // Assume a populated `DLManagedTensor` type here + * DLManagedTensor dataset; + * + * // Create default index params + * cuvsIvfSqIndexParams_t index_params; + * cuvsError_t params_create_status = cuvsIvfSqIndexParamsCreate(&index_params); + * + * // Create IVF-SQ index + * cuvsIvfSqIndex_t index; + * cuvsError_t index_create_status = cuvsIvfSqIndexCreate(&index); + * + * // Build the IVF-SQ Index + * cuvsError_t build_status = cuvsIvfSqBuild(res, index_params, &dataset, index); + * + * // de-allocate `index_params`, `index` and `res` + * cuvsError_t params_destroy_status = cuvsIvfSqIndexParamsDestroy(index_params); + * cuvsError_t index_destroy_status = cuvsIvfSqIndexDestroy(index); + * cuvsError_t res_destroy_status = cuvsResourcesDestroy(res); + * @endcode + * + * @param[in] res cuvsResources_t opaque C handle + * @param[in] index_params cuvsIvfSqIndexParams_t used to build IVF-SQ index + * @param[in] dataset DLManagedTensor* training dataset + * @param[out] index cuvsIvfSqIndex_t Newly built IVF-SQ index + * @return cuvsError_t + */ +cuvsError_t cuvsIvfSqBuild(cuvsResources_t res, + cuvsIvfSqIndexParams_t index_params, + DLManagedTensor* dataset, + cuvsIvfSqIndex_t index); +/** + * @} + */ + +/** + * @defgroup ivf_sq_c_index_search IVF-SQ index search + * @{ + */ +/** + * @brief Search an IVF-SQ index with a `DLManagedTensor` which has underlying + * `DLDeviceType` equal to `kDLCUDA`, `kDLCUDAHost`, `kDLCUDAManaged`. + * Types for input are: + * 1. `queries`: `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32` or 16 + * 2. `neighbors`: `kDLDataType.code == kDLInt` and `kDLDataType.bits = 64` + * 3. `distances`: `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32` + * + * @code {.c} + * #include + * #include + * + * // Create cuvsResources_t + * cuvsResources_t res; + * cuvsError_t res_create_status = cuvsResourcesCreate(&res); + * + * // Assume a populated `DLManagedTensor` type here + * DLManagedTensor queries; + * DLManagedTensor neighbors; + * DLManagedTensor distances; + * + * // Create default search params + * cuvsIvfSqSearchParams_t search_params; + * cuvsError_t params_create_status = cuvsIvfSqSearchParamsCreate(&search_params); + * + * // Search the `index` built using `cuvsIvfSqBuild` + * cuvsError_t search_status = cuvsIvfSqSearch(res, search_params, index, &queries, &neighbors, + * &distances); + * + * // de-allocate `search_params` and `res` + * cuvsError_t params_destroy_status = cuvsIvfSqSearchParamsDestroy(search_params); + * cuvsError_t res_destroy_status = cuvsResourcesDestroy(res); + * @endcode + * + * @param[in] res cuvsResources_t opaque C handle + * @param[in] search_params cuvsIvfSqSearchParams_t used to search IVF-SQ index + * @param[in] index ivfSqIndex which has been returned by `cuvsIvfSqBuild` + * @param[in] queries DLManagedTensor* queries dataset to search + * @param[out] neighbors DLManagedTensor* output `k` neighbors for queries + * @param[out] distances DLManagedTensor* output `k` distances for queries + */ +cuvsError_t cuvsIvfSqSearch(cuvsResources_t res, + cuvsIvfSqSearchParams_t search_params, + cuvsIvfSqIndex_t index, + DLManagedTensor* queries, + DLManagedTensor* neighbors, + DLManagedTensor* distances); + +/** + * @brief Search an IVF-SQ index with filtering. + * + * Same as cuvsIvfSqSearch, but applies a pre-filter to exclude vectors during search. + * + * @param[in] res cuvsResources_t opaque C handle + * @param[in] search_params cuvsIvfSqSearchParams_t used to search IVF-SQ index + * @param[in] index ivfSqIndex which has been returned by `cuvsIvfSqBuild` + * @param[in] queries DLManagedTensor* queries dataset to search + * @param[out] neighbors DLManagedTensor* output `k` neighbors for queries + * @param[out] distances DLManagedTensor* output `k` distances for queries + * @param[in] filter cuvsFilter to filter neighbors based on the given bitset + */ +cuvsError_t cuvsIvfSqSearchWithFilter(cuvsResources_t res, + cuvsIvfSqSearchParams_t search_params, + cuvsIvfSqIndex_t index, + DLManagedTensor* queries, + DLManagedTensor* neighbors, + DLManagedTensor* distances, + cuvsFilter filter); + +/** + * @} + */ + +/** + * @defgroup ivf_sq_c_index_serialize IVF-SQ C-API serialize functions + * @{ + */ +/** + * Save the index to file. + * + * Experimental, both the API and the serialization format are subject to change. + * + * @code{.c} + * #include + * + * // Create cuvsResources_t + * cuvsResources_t res; + * cuvsError_t res_create_status = cuvsResourcesCreate(&res); + * + * // create an index with `cuvsIvfSqBuild` + * cuvsIvfSqSerialize(res, "/path/to/index", index); + * @endcode + * + * @param[in] res cuvsResources_t opaque C handle + * @param[in] filename the file name for saving the index + * @param[in] index IVF-SQ index + */ +cuvsError_t cuvsIvfSqSerialize(cuvsResources_t res, const char* filename, cuvsIvfSqIndex_t index); + +/** + * Load index from file. + * + * Experimental, both the API and the serialization format are subject to change. + * + * @param[in] res cuvsResources_t opaque C handle + * @param[in] filename the name of the file that stores the index + * @param[out] index IVF-SQ index loaded from disk + */ +cuvsError_t cuvsIvfSqDeserialize(cuvsResources_t res, + const char* filename, + cuvsIvfSqIndex_t index); +/** + * @} + */ + +/** + * @defgroup ivf_sq_c_index_extend IVF-SQ index extend + * @{ + */ +/** + * @brief Extend the index with the new data. + * + * @param[in] res cuvsResources_t opaque C handle + * @param[in] new_vectors DLManagedTensor* the new vectors to add to the index + * @param[in] new_indices DLManagedTensor* vector of new indices for the new vectors + * @param[inout] index IVF-SQ index to be extended + * @return cuvsError_t + */ +cuvsError_t cuvsIvfSqExtend(cuvsResources_t res, + DLManagedTensor* new_vectors, + DLManagedTensor* new_indices, + cuvsIvfSqIndex_t index); +/** + * @} + */ +#ifdef __cplusplus +} +#endif diff --git a/c/src/neighbors/ivf_sq.cpp b/c/src/neighbors/ivf_sq.cpp new file mode 100644 index 0000000000..eadf84a299 --- /dev/null +++ b/c/src/neighbors/ivf_sq.cpp @@ -0,0 +1,363 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include + +#include +#include +#include + +#include +#include +#include + +#include "../core/exceptions.hpp" +#include "../core/interop.hpp" + +namespace cuvs::neighbors::ivf_sq { +void convert_c_index_params(cuvsIvfSqIndexParams params, + cuvs::neighbors::ivf_sq::index_params* out) +{ + out->metric = static_cast((int)params.metric); + out->metric_arg = params.metric_arg; + out->add_data_on_build = params.add_data_on_build; + out->n_lists = params.n_lists; + out->kmeans_n_iters = params.kmeans_n_iters; + out->kmeans_trainset_fraction = params.kmeans_trainset_fraction; + out->adaptive_centers = params.adaptive_centers; + out->conservative_memory_allocation = params.conservative_memory_allocation; +} +void convert_c_search_params(cuvsIvfSqSearchParams params, + cuvs::neighbors::ivf_sq::search_params* out) +{ + out->n_probes = params.n_probes; +} +} // namespace cuvs::neighbors::ivf_sq + +namespace { + +template +void* _build(cuvsResources_t res, cuvsIvfSqIndexParams params, DLManagedTensor* dataset_tensor) +{ + auto res_ptr = reinterpret_cast(res); + + auto build_params = cuvs::neighbors::ivf_sq::index_params(); + cuvs::neighbors::ivf_sq::convert_c_index_params(params, &build_params); + + auto dataset = dataset_tensor->dl_tensor; + auto dim = dataset.shape[1]; + + auto index = new cuvs::neighbors::ivf_sq::index(*res_ptr, build_params, dim); + + if (cuvs::core::is_dlpack_device_compatible(dataset)) { + using mdspan_type = raft::device_matrix_view; + auto mds = cuvs::core::from_dlpack(dataset_tensor); + cuvs::neighbors::ivf_sq::build(*res_ptr, build_params, mds, *index); + } else { + using mdspan_type = raft::host_matrix_view; + auto mds = cuvs::core::from_dlpack(dataset_tensor); + cuvs::neighbors::ivf_sq::build(*res_ptr, build_params, mds, *index); + } + + return index; +} + +template +void _search(cuvsResources_t res, + cuvsIvfSqSearchParams params, + cuvsIvfSqIndex index, + DLManagedTensor* queries_tensor, + DLManagedTensor* neighbors_tensor, + DLManagedTensor* distances_tensor, + cuvsFilter* filter) +{ + auto res_ptr = reinterpret_cast(res); + auto index_ptr = reinterpret_cast*>(index.addr); + + auto search_params = cuvs::neighbors::ivf_sq::search_params(); + cuvs::neighbors::ivf_sq::convert_c_search_params(params, &search_params); + + using queries_mdspan_type = raft::device_matrix_view; + using neighbors_mdspan_type = raft::device_matrix_view; + using distances_mdspan_type = raft::device_matrix_view; + auto queries_mds = cuvs::core::from_dlpack(queries_tensor); + auto neighbors_mds = cuvs::core::from_dlpack(neighbors_tensor); + auto distances_mds = cuvs::core::from_dlpack(distances_tensor); + + if (filter == nullptr || filter->type == NO_FILTER) { + cuvs::neighbors::ivf_sq::search( + *res_ptr, search_params, *index_ptr, queries_mds, neighbors_mds, distances_mds); + } else if (filter->type == BITSET) { + using filter_mdspan_type = raft::device_vector_view; + auto removed_indices_tensor = reinterpret_cast(filter->addr); + auto removed_indices = cuvs::core::from_dlpack(removed_indices_tensor); + cuvs::core::bitset_view removed_indices_bitset(removed_indices, + index_ptr->size()); + auto bitset_filter_obj = cuvs::neighbors::filtering::bitset_filter(removed_indices_bitset); + cuvs::neighbors::ivf_sq::search(*res_ptr, + search_params, + *index_ptr, + queries_mds, + neighbors_mds, + distances_mds, + bitset_filter_obj); + } else { + RAFT_FAIL("Unsupported filter type: BITMAP"); + } +} + +void _serialize(cuvsResources_t res, const char* filename, cuvsIvfSqIndex index) +{ + auto res_ptr = reinterpret_cast(res); + auto index_ptr = reinterpret_cast*>(index.addr); + cuvs::neighbors::ivf_sq::serialize(*res_ptr, std::string(filename), *index_ptr); +} + +void* _deserialize(cuvsResources_t res, const char* filename) +{ + auto res_ptr = reinterpret_cast(res); + auto index = new cuvs::neighbors::ivf_sq::index(*res_ptr); + cuvs::neighbors::ivf_sq::deserialize(*res_ptr, std::string(filename), index); + return index; +} + +template +void _extend(cuvsResources_t res, + DLManagedTensor* new_vectors, + DLManagedTensor* new_indices, + cuvsIvfSqIndex index) +{ + auto res_ptr = reinterpret_cast(res); + auto index_ptr = reinterpret_cast*>(index.addr); + + bool on_device = cuvs::core::is_dlpack_device_compatible(new_vectors->dl_tensor); + if (on_device != cuvs::core::is_dlpack_device_compatible(new_indices->dl_tensor)) { + RAFT_FAIL("extend inputs must both either be on device memory or host memory"); + } + + if (on_device) { + using vectors_mdspan_type = raft::device_matrix_view; + using indices_mdspan_type = raft::device_vector_view; + auto vectors_mds = cuvs::core::from_dlpack(new_vectors); + auto indices_mds = cuvs::core::from_dlpack(new_indices); + cuvs::neighbors::ivf_sq::extend(*res_ptr, vectors_mds, indices_mds, index_ptr); + } else { + using vectors_mdspan_type = raft::host_matrix_view; + using indices_mdspan_type = raft::host_vector_view; + auto vectors_mds = cuvs::core::from_dlpack(new_vectors); + auto indices_mds = cuvs::core::from_dlpack(new_indices); + cuvs::neighbors::ivf_sq::extend(*res_ptr, vectors_mds, indices_mds, index_ptr); + } +} + +void _get_centers(cuvsIvfSqIndex index, DLManagedTensor* centers) +{ + auto index_ptr = reinterpret_cast*>(index.addr); + cuvs::core::to_dlpack(index_ptr->centers(), centers); +} +} // namespace + +extern "C" cuvsError_t cuvsIvfSqIndexCreate(cuvsIvfSqIndex_t* index) +{ + return cuvs::core::translate_exceptions([=] { *index = new cuvsIvfSqIndex{}; }); +} + +extern "C" cuvsError_t cuvsIvfSqIndexDestroy(cuvsIvfSqIndex_t index_c_ptr) +{ + return cuvs::core::translate_exceptions([=] { + auto index = *index_c_ptr; + auto index_ptr = reinterpret_cast*>(index.addr); + delete index_ptr; + delete index_c_ptr; + }); +} + +extern "C" cuvsError_t cuvsIvfSqBuild(cuvsResources_t res, + cuvsIvfSqIndexParams_t params, + DLManagedTensor* dataset_tensor, + cuvsIvfSqIndex_t index) +{ + return cuvs::core::translate_exceptions([=] { + auto dataset = dataset_tensor->dl_tensor; + + index->dtype.code = dataset.dtype.code; + index->dtype.bits = dataset.dtype.bits; + + if (dataset.dtype.code == kDLFloat && dataset.dtype.bits == 32) { + index->addr = reinterpret_cast(_build(res, *params, dataset_tensor)); + } else if (dataset.dtype.code == kDLFloat && dataset.dtype.bits == 16) { + index->addr = reinterpret_cast(_build(res, *params, dataset_tensor)); + } else { + RAFT_FAIL("Unsupported dataset DLtensor dtype: %d and bits: %d", + dataset.dtype.code, + dataset.dtype.bits); + } + }); +} + +static cuvsError_t _cuvsIvfSqSearchImpl(cuvsResources_t res, + cuvsIvfSqSearchParams_t params, + cuvsIvfSqIndex_t index_c_ptr, + DLManagedTensor* queries_tensor, + DLManagedTensor* neighbors_tensor, + DLManagedTensor* distances_tensor, + cuvsFilter* filter) +{ + return cuvs::core::translate_exceptions([=] { + auto queries = queries_tensor->dl_tensor; + auto neighbors = neighbors_tensor->dl_tensor; + auto distances = distances_tensor->dl_tensor; + + RAFT_EXPECTS(cuvs::core::is_dlpack_device_compatible(queries), + "queries should have device compatible memory"); + RAFT_EXPECTS(cuvs::core::is_dlpack_device_compatible(neighbors), + "neighbors should have device compatible memory"); + RAFT_EXPECTS(cuvs::core::is_dlpack_device_compatible(distances), + "distances should have device compatible memory"); + + RAFT_EXPECTS(neighbors.dtype.code == kDLInt && neighbors.dtype.bits == 64, + "neighbors should be of type int64_t"); + RAFT_EXPECTS(distances.dtype.code == kDLFloat && distances.dtype.bits == 32, + "distances should be of type float32"); + + auto index = *index_c_ptr; + if (queries.dtype.code == kDLFloat && queries.dtype.bits == 32) { + _search( + res, *params, index, queries_tensor, neighbors_tensor, distances_tensor, filter); + } else if (queries.dtype.code == kDLFloat && queries.dtype.bits == 16) { + _search( + res, *params, index, queries_tensor, neighbors_tensor, distances_tensor, filter); + } else { + RAFT_FAIL("Unsupported queries DLtensor dtype: %d and bits: %d", + queries.dtype.code, + queries.dtype.bits); + } + }); +} + +extern "C" cuvsError_t cuvsIvfSqSearch(cuvsResources_t res, + cuvsIvfSqSearchParams_t params, + cuvsIvfSqIndex_t index_c_ptr, + DLManagedTensor* queries_tensor, + DLManagedTensor* neighbors_tensor, + DLManagedTensor* distances_tensor) +{ + return _cuvsIvfSqSearchImpl( + res, params, index_c_ptr, queries_tensor, neighbors_tensor, distances_tensor, nullptr); +} + +extern "C" cuvsError_t cuvsIvfSqSearchWithFilter(cuvsResources_t res, + cuvsIvfSqSearchParams_t params, + cuvsIvfSqIndex_t index_c_ptr, + DLManagedTensor* queries_tensor, + DLManagedTensor* neighbors_tensor, + DLManagedTensor* distances_tensor, + cuvsFilter filter) +{ + return _cuvsIvfSqSearchImpl( + res, params, index_c_ptr, queries_tensor, neighbors_tensor, distances_tensor, &filter); +} + +extern "C" cuvsError_t cuvsIvfSqIndexParamsCreate(cuvsIvfSqIndexParams_t* params) +{ + return cuvs::core::translate_exceptions([=] { + *params = new cuvsIvfSqIndexParams{.metric = L2Expanded, + .metric_arg = 2.0f, + .add_data_on_build = true, + .n_lists = 1024, + .kmeans_n_iters = 20, + .kmeans_trainset_fraction = 0.5, + .adaptive_centers = false, + .conservative_memory_allocation = false}; + }); +} + +extern "C" cuvsError_t cuvsIvfSqIndexParamsDestroy(cuvsIvfSqIndexParams_t params) +{ + return cuvs::core::translate_exceptions([=] { delete params; }); +} + +extern "C" cuvsError_t cuvsIvfSqSearchParamsCreate(cuvsIvfSqSearchParams_t* params) +{ + return cuvs::core::translate_exceptions( + [=] { *params = new cuvsIvfSqSearchParams{.n_probes = 20}; }); +} + +extern "C" cuvsError_t cuvsIvfSqSearchParamsDestroy(cuvsIvfSqSearchParams_t params) +{ + return cuvs::core::translate_exceptions([=] { delete params; }); +} + +extern "C" cuvsError_t cuvsIvfSqDeserialize(cuvsResources_t res, + const char* filename, + cuvsIvfSqIndex_t index) +{ + return cuvs::core::translate_exceptions( + [=] { index->addr = reinterpret_cast(_deserialize(res, filename)); }); +} + +extern "C" cuvsError_t cuvsIvfSqSerialize(cuvsResources_t res, + const char* filename, + cuvsIvfSqIndex_t index) +{ + return cuvs::core::translate_exceptions([=] { _serialize(res, filename, *index); }); +} + +extern "C" cuvsError_t cuvsIvfSqExtend(cuvsResources_t res, + DLManagedTensor* new_vectors, + DLManagedTensor* new_indices, + cuvsIvfSqIndex_t index) +{ + return cuvs::core::translate_exceptions([=] { + auto vectors = new_vectors->dl_tensor; + + if (index->dtype.code == 0 && index->dtype.bits == 0) { + index->dtype.code = vectors.dtype.code; + index->dtype.bits = vectors.dtype.bits; + } + + if (vectors.dtype.code == kDLFloat && vectors.dtype.bits == 32) { + _extend(res, new_vectors, new_indices, *index); + } else if (vectors.dtype.code == kDLFloat && vectors.dtype.bits == 16) { + _extend(res, new_vectors, new_indices, *index); + } else { + RAFT_FAIL( + "Unsupported vectors DLtensor dtype: %d and bits: %d", vectors.dtype.code, vectors.dtype.bits); + } + }); +} + +extern "C" cuvsError_t cuvsIvfSqIndexGetNLists(cuvsIvfSqIndex_t index, int64_t* n_lists) +{ + return cuvs::core::translate_exceptions([=] { + auto index_ptr = + reinterpret_cast*>(index->addr); + *n_lists = index_ptr->n_lists(); + }); +} + +extern "C" cuvsError_t cuvsIvfSqIndexGetDim(cuvsIvfSqIndex_t index, int64_t* dim) +{ + return cuvs::core::translate_exceptions([=] { + auto index_ptr = + reinterpret_cast*>(index->addr); + *dim = index_ptr->dim(); + }); +} + +extern "C" cuvsError_t cuvsIvfSqIndexGetSize(cuvsIvfSqIndex_t index, int64_t* size) +{ + return cuvs::core::translate_exceptions([=] { + auto index_ptr = + reinterpret_cast*>(index->addr); + *size = index_ptr->size(); + }); +} + +extern "C" cuvsError_t cuvsIvfSqIndexGetCenters(cuvsIvfSqIndex_t index, DLManagedTensor* centers) +{ + return cuvs::core::translate_exceptions([=] { _get_centers(*index, centers); }); +} diff --git a/c/src/neighbors/ivf_sq.hpp b/c/src/neighbors/ivf_sq.hpp new file mode 100644 index 0000000000..3a08bc689a --- /dev/null +++ b/c/src/neighbors/ivf_sq.hpp @@ -0,0 +1,14 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ +#include +#include + +namespace cuvs::neighbors::ivf_sq { +/// Converts a cuvsIvfSqIndexParams struct (c) to a ivf_sq::index_params (C++) struct +void convert_c_index_params(cuvsIvfSqIndexParams params, + cuvs::neighbors::ivf_sq::index_params* out); +void convert_c_search_params(cuvsIvfSqSearchParams params, + cuvs::neighbors::ivf_sq::search_params* out); +} // namespace cuvs::neighbors::ivf_sq diff --git a/c/tests/CMakeLists.txt b/c/tests/CMakeLists.txt index 6d52e5b174..9c96fc4120 100644 --- a/c/tests/CMakeLists.txt +++ b/c/tests/CMakeLists.txt @@ -1,6 +1,6 @@ # ============================================================================= # cmake-format: off -# SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 # cmake-format: on # ============================================================================= @@ -77,6 +77,7 @@ ConfigureTest( ConfigureTest(NAME BRUTEFORCE_C_TEST PATH neighbors/run_brute_force_c.c neighbors/brute_force_c.cu) ConfigureTest(NAME IVF_FLAT_C_TEST PATH neighbors/run_ivf_flat_c.c neighbors/ann_ivf_flat_c.cu) ConfigureTest(NAME IVF_PQ_C_TEST PATH neighbors/run_ivf_pq_c.c neighbors/ann_ivf_pq_c.cu) +ConfigureTest(NAME IVF_SQ_C_TEST PATH neighbors/run_ivf_sq_c.c neighbors/ann_ivf_sq_c.cu) ConfigureTest(NAME CAGRA_C_TEST PATH neighbors/ann_cagra_c.cu) ConfigureTest(NAME MG_C_TEST PATH neighbors/run_mg_c.c neighbors/ann_mg_c.cu) ConfigureTest( diff --git a/c/tests/neighbors/ann_ivf_sq_c.cu b/c/tests/neighbors/ann_ivf_sq_c.cu new file mode 100644 index 0000000000..c36786e45a --- /dev/null +++ b/c/tests/neighbors/ann_ivf_sq_c.cu @@ -0,0 +1,130 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include +#include +#include + +#include "neighbors/ann_utils.cuh" +#include + +extern "C" void run_ivf_sq(int64_t n_rows, + int64_t n_queries, + int64_t n_dim, + uint32_t n_neighbors, + float* index_data, + float* query_data, + float* distances_data, + int64_t* neighbors_data, + cuvsDistanceType metric, + size_t n_probes, + size_t n_lists); + +template +void generate_random_data(T* devPtr, size_t size) +{ + raft::handle_t handle; + raft::random::RngState r(1234ULL); + raft::random::uniform(handle, r, devPtr, size, T(0.1), T(2.0)); +}; + +template +void recall_eval(T* query_data, + T* index_data, + IdxT* neighbors, + T* distances, + size_t n_queries, + size_t n_rows, + size_t n_dim, + size_t n_neighbors, + cuvsDistanceType metric, + size_t n_probes, + size_t n_lists) +{ + raft::handle_t handle; + auto distances_ref = raft::make_device_matrix(handle, n_queries, n_neighbors); + auto neighbors_ref = raft::make_device_matrix(handle, n_queries, n_neighbors); + cuvs::neighbors::naive_knn( + handle, + distances_ref.data_handle(), + neighbors_ref.data_handle(), + query_data, + index_data, + n_queries, + n_rows, + n_dim, + n_neighbors, + static_cast((uint16_t)metric)); + + size_t size = n_queries * n_neighbors; + std::vector neighbors_h(size); + std::vector distances_h(size); + std::vector neighbors_ref_h(size); + std::vector distances_ref_h(size); + + auto stream = raft::resource::get_cuda_stream(handle); + raft::copy(neighbors_h.data(), neighbors, size, stream); + raft::copy(distances_h.data(), distances, size, stream); + raft::copy(neighbors_ref_h.data(), neighbors_ref.data_handle(), size, stream); + raft::copy(distances_ref_h.data(), distances_ref.data_handle(), size, stream); + + double min_recall = static_cast(n_probes) / static_cast(n_lists); + ASSERT_TRUE(cuvs::neighbors::eval_neighbours(neighbors_ref_h, + neighbors_h, + distances_ref_h, + distances_h, + n_queries, + n_neighbors, + 0.001, + min_recall)); +}; + +TEST(IvfSqC, BuildSearch) +{ + int64_t n_rows = 8096; + int64_t n_queries = 128; + int64_t n_dim = 32; + uint32_t n_neighbors = 8; + + raft::handle_t handle; + auto stream = raft::resource::get_cuda_stream(handle); + + cuvsDistanceType metric = L2Expanded; + size_t n_probes = 20; + size_t n_lists = 1024; + + rmm::device_uvector index_data(n_rows * n_dim, stream); + rmm::device_uvector query_data(n_queries * n_dim, stream); + rmm::device_uvector neighbors_data(n_queries * n_neighbors, stream); + rmm::device_uvector distances_data(n_queries * n_neighbors, stream); + + generate_random_data(index_data.data(), n_rows * n_dim); + generate_random_data(query_data.data(), n_queries * n_dim); + + run_ivf_sq(n_rows, + n_queries, + n_dim, + n_neighbors, + index_data.data(), + query_data.data(), + distances_data.data(), + neighbors_data.data(), + metric, + n_probes, + n_lists); + + recall_eval(query_data.data(), + index_data.data(), + neighbors_data.data(), + distances_data.data(), + n_queries, + n_rows, + n_dim, + n_neighbors, + metric, + n_probes, + n_lists); +} diff --git a/c/tests/neighbors/c_api.c b/c/tests/neighbors/c_api.c index 6988aaf618..86108ea703 100644 --- a/c/tests/neighbors/c_api.c +++ b/c/tests/neighbors/c_api.c @@ -1,11 +1,12 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ #include #include #include +#include #include #include @@ -47,6 +48,15 @@ void test_compile_tiered_index() cuvsTieredIndexExtend(resources, &dataset, tiered_index); } +void test_compile_ivf_sq() +{ + assert(!"test_compile_ivf_sq is not meant to be run"); + + cuvsIvfSqIndex_t index; + cuvsIvfSqIndexCreate(&index); + cuvsIvfSqIndexDestroy(index); +} + void test_compile_all_neighbors() { // Smoke test to ensure that the all_neighbors.h API compiles correctly @@ -66,6 +76,7 @@ int main() // These are smoke tests that check that the C-APIs compile with a C compiler. // These are not meant to be run. test_compile_cagra(); + test_compile_ivf_sq(); test_compile_tiered_index(); test_compile_all_neighbors(); diff --git a/c/tests/neighbors/run_ivf_sq_c.c b/c/tests/neighbors/run_ivf_sq_c.c new file mode 100644 index 0000000000..d07502abd6 --- /dev/null +++ b/c/tests/neighbors/run_ivf_sq_c.c @@ -0,0 +1,86 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#include + +void run_ivf_sq(int64_t n_rows, + int64_t n_queries, + int64_t n_dim, + uint32_t n_neighbors, + float* index_data, + float* query_data, + float* distances_data, + int64_t* neighbors_data, + cuvsDistanceType metric, + size_t n_probes, + size_t n_lists) +{ + cuvsResources_t res; + cuvsResourcesCreate(&res); + + DLManagedTensor dataset_tensor; + dataset_tensor.dl_tensor.data = index_data; + dataset_tensor.dl_tensor.device.device_type = kDLCUDA; + dataset_tensor.dl_tensor.ndim = 2; + dataset_tensor.dl_tensor.dtype.code = kDLFloat; + dataset_tensor.dl_tensor.dtype.bits = 32; + dataset_tensor.dl_tensor.dtype.lanes = 1; + int64_t dataset_shape[2] = {n_rows, n_dim}; + dataset_tensor.dl_tensor.shape = dataset_shape; + dataset_tensor.dl_tensor.strides = NULL; + + cuvsIvfSqIndex_t index; + cuvsIvfSqIndexCreate(&index); + + cuvsIvfSqIndexParams_t build_params; + cuvsIvfSqIndexParamsCreate(&build_params); + build_params->metric = metric; + build_params->n_lists = n_lists; + cuvsIvfSqBuild(res, build_params, &dataset_tensor, index); + + DLManagedTensor queries_tensor; + queries_tensor.dl_tensor.data = (void*)query_data; + queries_tensor.dl_tensor.device.device_type = kDLCUDA; + queries_tensor.dl_tensor.ndim = 2; + queries_tensor.dl_tensor.dtype.code = kDLFloat; + queries_tensor.dl_tensor.dtype.bits = 32; + queries_tensor.dl_tensor.dtype.lanes = 1; + int64_t queries_shape[2] = {n_queries, n_dim}; + queries_tensor.dl_tensor.shape = queries_shape; + queries_tensor.dl_tensor.strides = NULL; + + DLManagedTensor neighbors_tensor; + neighbors_tensor.dl_tensor.data = (void*)neighbors_data; + neighbors_tensor.dl_tensor.device.device_type = kDLCUDA; + neighbors_tensor.dl_tensor.ndim = 2; + neighbors_tensor.dl_tensor.dtype.code = kDLInt; + neighbors_tensor.dl_tensor.dtype.bits = 64; + neighbors_tensor.dl_tensor.dtype.lanes = 1; + int64_t neighbors_shape[2] = {n_queries, n_neighbors}; + neighbors_tensor.dl_tensor.shape = neighbors_shape; + neighbors_tensor.dl_tensor.strides = NULL; + + DLManagedTensor distances_tensor; + distances_tensor.dl_tensor.data = (void*)distances_data; + distances_tensor.dl_tensor.device.device_type = kDLCUDA; + distances_tensor.dl_tensor.ndim = 2; + distances_tensor.dl_tensor.dtype.code = kDLFloat; + distances_tensor.dl_tensor.dtype.bits = 32; + distances_tensor.dl_tensor.dtype.lanes = 1; + int64_t distances_shape[2] = {n_queries, n_neighbors}; + distances_tensor.dl_tensor.shape = distances_shape; + distances_tensor.dl_tensor.strides = NULL; + + cuvsIvfSqSearchParams_t search_params; + cuvsIvfSqSearchParamsCreate(&search_params); + search_params->n_probes = n_probes; + cuvsIvfSqSearch( + res, search_params, index, &queries_tensor, &neighbors_tensor, &distances_tensor); + + cuvsIvfSqSearchParamsDestroy(search_params); + cuvsIvfSqIndexParamsDestroy(build_params); + cuvsIvfSqIndexDestroy(index); + cuvsResourcesDestroy(res); +} diff --git a/cpp/src/neighbors/ivf_sq/ivf_sq_search.cuh b/cpp/src/neighbors/ivf_sq/ivf_sq_search.cuh index 39c653b048..a17992ff19 100644 --- a/cpp/src/neighbors/ivf_sq/ivf_sq_search.cuh +++ b/cpp/src/neighbors/ivf_sq/ivf_sq_search.cuh @@ -427,7 +427,7 @@ void search_impl(raft::resources const& handle, num_samples_view); ivf::detail::postprocess_distances( - distances, distances, index.metric(), n_queries, k, 1.0, false, stream); + handle, distances, distances, index.metric(), n_queries, k, 1.0, false); ivf::detail::postprocess_neighbors(neighbors, neighbors_uint32_ptr, From 928830a9a0677d4d0a3bc1283f5b0beb421cdbe0 Mon Sep 17 00:00:00 2001 From: vic Date: Thu, 12 Mar 2026 16:24:26 +0100 Subject: [PATCH 5/5] Add C documentation --- docs/source/c_api/neighbors.rst | 1 + docs/source/c_api/neighbors_ivf_sq_c.rst | 66 ++++++++++++++++++++++++ 2 files changed, 67 insertions(+) create mode 100644 docs/source/c_api/neighbors_ivf_sq_c.rst diff --git a/docs/source/c_api/neighbors.rst b/docs/source/c_api/neighbors.rst index 09bd47e2c8..b950aa8227 100644 --- a/docs/source/c_api/neighbors.rst +++ b/docs/source/c_api/neighbors.rst @@ -12,6 +12,7 @@ Nearest Neighbors neighbors_bruteforce_c.rst neighbors_ivf_flat_c.rst neighbors_ivf_pq_c.rst + neighbors_ivf_sq_c.rst neighbors_cagra_c.rst neighbors_hnsw_c.rst neighbors_mg.rst diff --git a/docs/source/c_api/neighbors_ivf_sq_c.rst b/docs/source/c_api/neighbors_ivf_sq_c.rst new file mode 100644 index 0000000000..be903fcf97 --- /dev/null +++ b/docs/source/c_api/neighbors_ivf_sq_c.rst @@ -0,0 +1,66 @@ +IVF-SQ +====== + +The IVF-SQ method is an ANN algorithm. It uses an inverted file index (IVF) with scalar quantization (SQ) to compress the vectors. This algorithm provides knobs to reduce the overall search space and memory footprint, and to trade-off accuracy for speed. + +.. role:: py(code) + :language: c + :class: highlight + +``#include `` + +Index build parameters +---------------------- + +.. doxygengroup:: ivf_sq_c_index_params + :project: cuvs + :members: + :content-only: + +Index search parameters +----------------------- + +.. doxygengroup:: ivf_sq_c_search_params + :project: cuvs + :members: + :content-only: + +Index +----- + +.. doxygengroup:: ivf_sq_c_index + :project: cuvs + :members: + :content-only: + +Index build +----------- + +.. doxygengroup:: ivf_sq_c_index_build + :project: cuvs + :members: + :content-only: + +Index search +------------ + +.. doxygengroup:: ivf_sq_c_index_search + :project: cuvs + :members: + :content-only: + +Index extend +------------ + +.. doxygengroup:: ivf_sq_c_index_extend + :project: cuvs + :members: + :content-only: + +Index serialize +--------------- + +.. doxygengroup:: ivf_sq_c_index_serialize + :project: cuvs + :members: + :content-only: