Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -633,6 +633,12 @@ if(NOT BUILD_CPU_ONLY)
src/neighbors/ivf_pq/detail/ivf_pq_transform_half_int64_t.cu
src/neighbors/ivf_pq/detail/ivf_pq_transform_int8_t_int64_t.cu
src/neighbors/ivf_pq/detail/ivf_pq_transform_uint8_t_int64_t.cu
src/neighbors/ivf_sq_index.cpp
src/neighbors/ivf_sq/ivf_sq_build_extend_float_uint8_t_int64_t.cu
src/neighbors/ivf_sq/ivf_sq_build_extend_half_uint8_t_int64_t.cu
src/neighbors/ivf_sq/ivf_sq_search_float_uint8_t_int64_t.cu
src/neighbors/ivf_sq/ivf_sq_search_half_uint8_t_int64_t.cu
src/neighbors/ivf_sq/ivf_sq_serialize_uint8_t.cu
src/neighbors/knn_merge_parts.cu
src/neighbors/nn_descent.cu
src/neighbors/nn_descent_float.cu
Expand Down
9 changes: 9 additions & 0 deletions cpp/bench/ann/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ option(CUVS_ANN_BENCH_USE_FAISS_CPU_IVF_FLAT "Include faiss' cpu ivf flat algori
option(CUVS_ANN_BENCH_USE_FAISS_CPU_IVF_PQ "Include faiss' cpu ivf pq algorithm in benchmark" ON)
option(CUVS_ANN_BENCH_USE_FAISS_CPU_HNSW_FLAT "Include faiss' hnsw algorithm in benchmark" ON)
option(CUVS_ANN_BENCH_USE_CUVS_IVF_FLAT "Include cuVS ivf flat algorithm in benchmark" ON)
option(CUVS_ANN_BENCH_USE_CUVS_IVF_SQ "Include cuVS ivf sq algorithm in benchmark" ON)
option(CUVS_ANN_BENCH_USE_CUVS_IVF_PQ "Include cuVS ivf pq algorithm in benchmark" ON)
option(CUVS_ANN_BENCH_USE_CUVS_CAGRA "Include cuVS CAGRA in benchmark" ON)
option(CUVS_ANN_BENCH_USE_CUVS_BRUTE_FORCE "Include cuVS brute force knn in benchmark" ON)
Expand Down Expand Up @@ -80,6 +81,7 @@ set(CUVS_USE_FAISS_STATIC ON)
if(BUILD_CPU_ONLY)
set(CUVS_FAISS_ENABLE_GPU OFF)
set(CUVS_ANN_BENCH_USE_CUVS_IVF_FLAT OFF)
set(CUVS_ANN_BENCH_USE_CUVS_IVF_SQ OFF)
set(CUVS_ANN_BENCH_USE_CUVS_IVF_PQ OFF)
set(CUVS_ANN_BENCH_USE_CUVS_CAGRA OFF)
set(CUVS_ANN_BENCH_USE_CUVS_BRUTE_FORCE OFF)
Expand All @@ -97,6 +99,7 @@ set(CUVS_ANN_BENCH_USE_CUVS OFF)
if(CUVS_ANN_BENCH_USE_CUVS_IVF_PQ
OR CUVS_ANN_BENCH_USE_CUVS_BRUTE_FORCE
OR CUVS_ANN_BENCH_USE_CUVS_IVF_FLAT
OR CUVS_ANN_BENCH_USE_CUVS_IVF_SQ
OR CUVS_ANN_BENCH_USE_CUVS_CAGRA
OR CUVS_ANN_BENCH_USE_CUVS_CAGRA_HNSWLIB
OR CUVS_KNN_BENCH_USE_CUVS_BRUTE_FORCE
Expand Down Expand Up @@ -244,6 +247,12 @@ if(CUVS_ANN_BENCH_USE_CUVS_IVF_FLAT)
)
endif()

if(CUVS_ANN_BENCH_USE_CUVS_IVF_SQ)
ConfigureAnnBench(
NAME CUVS_IVF_SQ PATH src/cuvs/cuvs_benchmark.cu src/cuvs/cuvs_ivf_sq.cu LINKS cuvs
)
endif()

if(CUVS_ANN_BENCH_USE_CUVS_BRUTE_FORCE)
ConfigureAnnBench(NAME CUVS_BRUTE_FORCE PATH src/cuvs/cuvs_benchmark.cu LINKS cuvs)
endif()
Expand Down
25 changes: 25 additions & 0 deletions cpp/bench/ann/src/cuvs/cuvs_ann_bench_param_parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,11 @@ extern template class cuvs::bench::cuvs_cagra<uint8_t, uint32_t>;
extern template class cuvs::bench::cuvs_cagra<int8_t, uint32_t>;
#endif

#ifdef CUVS_ANN_BENCH_USE_CUVS_IVF_SQ
#include "cuvs_ivf_sq_wrapper.h"
extern template class cuvs::bench::cuvs_ivf_sq<float>;
extern template class cuvs::bench::cuvs_ivf_sq<half>;
#endif
#ifdef CUVS_ANN_BENCH_USE_CUVS_MG
#include "cuvs_ivf_flat_wrapper.h"
#include "cuvs_mg_ivf_flat_wrapper.h"
Expand Down Expand Up @@ -86,6 +91,26 @@ void parse_search_param(const nlohmann::json& conf,
}
#endif

#ifdef CUVS_ANN_BENCH_USE_CUVS_IVF_SQ
template <typename T>
void parse_build_param(const nlohmann::json& conf,
typename cuvs::bench::cuvs_ivf_sq<T>::build_param& param)
{
param.n_lists = conf.at("nlist");
if (conf.contains("niter")) { param.kmeans_n_iters = conf.at("niter"); }
if (conf.contains("ratio")) {
param.kmeans_trainset_fraction = 1.0 / static_cast<double>(conf.at("ratio"));
}
}

template <typename T>
void parse_search_param(const nlohmann::json& conf,
typename cuvs::bench::cuvs_ivf_sq<T>::search_param& param)
{
param.ivf_sq_params.n_probes = conf.at("nprobe");
}
#endif

#if defined(CUVS_ANN_BENCH_USE_CUVS_IVF_PQ) || defined(CUVS_ANN_BENCH_USE_CUVS_CAGRA) || \
defined(CUVS_ANN_BENCH_USE_CUVS_CAGRA_HNSWLIB) || defined(CUVS_ANN_BENCH_USE_CUVS_MG) || \
defined(CUVS_ANN_BENCH_USE_CUVS_CAGRA_DISKANN)
Expand Down
20 changes: 19 additions & 1 deletion cpp/bench/ann/src/cuvs/cuvs_benchmark.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION.
* SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION.
* SPDX-License-Identifier: Apache-2.0
*/

Expand Down Expand Up @@ -84,6 +84,15 @@ auto create_algo(const std::string& algo_name,
}
}
#endif
#ifdef CUVS_ANN_BENCH_USE_CUVS_IVF_SQ
if constexpr (std::is_same_v<T, float> || std::is_same_v<T, half>) {
if (algo_name == "cuvs_ivf_sq") {
typename cuvs::bench::cuvs_ivf_sq<T>::build_param param;
parse_build_param<T>(conf, param);
a = std::make_unique<cuvs::bench::cuvs_ivf_sq<T>>(metric, dim, param);
}
}
#endif
#ifdef CUVS_ANN_BENCH_USE_CUVS_IVF_PQ
if (algo_name == "raft_ivf_pq" || algo_name == "cuvs_ivf_pq") {
typename cuvs::bench::cuvs_ivf_pq<T, int64_t>::build_param param;
Expand Down Expand Up @@ -151,6 +160,15 @@ auto create_search_param(const std::string& algo_name, const nlohmann::json& con
}
}
#endif
#ifdef CUVS_ANN_BENCH_USE_CUVS_IVF_SQ
if constexpr (std::is_same_v<T, float> || std::is_same_v<T, half>) {
if (algo_name == "cuvs_ivf_sq") {
auto param = std::make_unique<typename cuvs::bench::cuvs_ivf_sq<T>::search_param>();
parse_search_param<T>(conf, *param);
return param;
}
}
#endif
#ifdef CUVS_ANN_BENCH_USE_CUVS_IVF_PQ
if (algo_name == "raft_ivf_pq" || algo_name == "cuvs_ivf_pq") {
auto param = std::make_unique<typename cuvs::bench::cuvs_ivf_pq<T, int64_t>::search_param>();
Expand Down
10 changes: 10 additions & 0 deletions cpp/bench/ann/src/cuvs/cuvs_ivf_sq.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
* SPDX-License-Identifier: Apache-2.0
*/
#include "cuvs_ivf_sq_wrapper.h"

namespace cuvs::bench {
template class cuvs_ivf_sq<float>;
template class cuvs_ivf_sq<half>;
} // namespace cuvs::bench
141 changes: 141 additions & 0 deletions cpp/bench/ann/src/cuvs/cuvs_ivf_sq_wrapper.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
* SPDX-License-Identifier: Apache-2.0
*/
#pragma once

#include "../common/ann_types.hpp"
#include "cuvs_ann_bench_utils.h"

#include <cuvs/distance/distance.hpp>
#include <cuvs/neighbors/ivf_sq.hpp>
#include <raft/core/device_mdspan.hpp>
#include <raft/core/device_resources.hpp>
#include <raft/core/logger.hpp>
#include <raft/core/resource/cuda_stream.hpp>
#include <raft/util/cudart_utils.hpp>
#include <rmm/cuda_stream_pool.hpp>

#include <cassert>
#include <memory>
#include <string>
#include <type_traits>

namespace cuvs::bench {

template <typename T>
class cuvs_ivf_sq : public algo<T>, public algo_gpu {
public:
using search_param_base = typename algo<T>::search_param;

struct search_param : public search_param_base {
cuvs::neighbors::ivf_sq::search_params ivf_sq_params;
};

using build_param = cuvs::neighbors::ivf_sq::index_params;

cuvs_ivf_sq(Metric metric, int dim, const build_param& param)
: algo<T>(metric, dim), index_params_(param), dimension_(dim)
{
index_params_.metric = parse_metric_type(metric);
index_params_.conservative_memory_allocation = true;
RAFT_CUDA_TRY(cudaGetDevice(&device_));
}

void build(const T* dataset, size_t nrow) final;

void set_search_param(const search_param_base& param, const void* filter_bitset) override;

void search(const T* queries,
int batch_size,
int k,
algo_base::index_type* neighbors,
float* distances) const override;

[[nodiscard]] auto get_sync_stream() const noexcept -> cudaStream_t override
{
return handle_.get_sync_stream();
}

[[nodiscard]] auto get_preference() const -> algo_property override
{
algo_property property;
property.dataset_memory_type = MemoryType::kHostMmap;
property.query_memory_type = MemoryType::kDevice;
return property;
}

void save(const std::string& file) const override;
void load(const std::string&) override;
std::unique_ptr<algo<T>> copy() override;

private:
configured_raft_resources handle_{};
build_param index_params_;
cuvs::neighbors::ivf_sq::search_params search_params_;
std::shared_ptr<cuvs::neighbors::ivf_sq::index<uint8_t>> index_;
int device_;
int dimension_;

std::shared_ptr<cuvs::neighbors::filtering::base_filter> filter_;
};

template <typename T>
void cuvs_ivf_sq<T>::build(const T* dataset, size_t nrow)
{
size_t n_streams = 1;
raft::resource::set_cuda_stream_pool(handle_, std::make_shared<rmm::cuda_stream_pool>(n_streams));
index_ = std::make_shared<cuvs::neighbors::ivf_sq::index<uint8_t>>(
std::move(cuvs::neighbors::ivf_sq::build(
handle_,
index_params_,
raft::make_host_matrix_view<const T, int64_t>(dataset, nrow, dimension_))));
}

template <typename T>
void cuvs_ivf_sq<T>::set_search_param(const search_param_base& param, const void* filter_bitset)
{
filter_ = make_cuvs_filter(filter_bitset, index_->size());
auto sp = dynamic_cast<const search_param&>(param);
search_params_ = sp.ivf_sq_params;
assert(search_params_.n_probes <= index_params_.n_lists);
}

template <typename T>
void cuvs_ivf_sq<T>::save(const std::string& file) const
{
cuvs::neighbors::ivf_sq::serialize(handle_, file, *index_);
}

template <typename T>
void cuvs_ivf_sq<T>::load(const std::string& file)
{
index_ =
std::make_shared<cuvs::neighbors::ivf_sq::index<uint8_t>>(handle_, index_params_, this->dim_);
cuvs::neighbors::ivf_sq::deserialize(handle_, file, index_.get());
}

template <typename T>
std::unique_ptr<algo<T>> cuvs_ivf_sq<T>::copy()
{
return std::make_unique<cuvs_ivf_sq<T>>(*this);
}

template <typename T>
void cuvs_ivf_sq<T>::search(
const T* queries, int batch_size, int k, algo_base::index_type* neighbors, float* distances) const
{
static_assert(sizeof(algo_base::index_type) == sizeof(int64_t));

cuvs::neighbors::ivf_sq::search(
handle_,
search_params_,
*index_,
raft::make_device_matrix_view<const T, int64_t>(queries, batch_size, index_->dim()),
raft::make_device_matrix_view<int64_t, int64_t>(
reinterpret_cast<int64_t*>(neighbors), batch_size, k),
raft::make_device_matrix_view<float, int64_t>(distances, batch_size, k),
*filter_);
}

} // namespace cuvs::bench
Loading
Loading