diff --git a/c/CMakeLists.txt b/c/CMakeLists.txt index 22a25c24d0..980cb78f29 100644 --- a/c/CMakeLists.txt +++ b/c/CMakeLists.txt @@ -89,6 +89,7 @@ add_library( src/neighbors/brute_force.cpp src/neighbors/ivf_flat.cpp src/neighbors/ivf_pq.cpp + src/neighbors/ivf_sq.cpp src/neighbors/cagra.cpp $<$:src/neighbors/hnsw.cpp> $<$:src/neighbors/mg_ivf_pq.cpp> diff --git a/c/include/cuvs/core/all.h b/c/include/cuvs/core/all.h index cc83684925..0847cc63da 100644 --- a/c/include/cuvs/core/all.h +++ b/c/include/cuvs/core/all.h @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include diff --git a/c/include/cuvs/neighbors/ivf_sq.h b/c/include/cuvs/neighbors/ivf_sq.h new file mode 100644 index 0000000000..6b312443b0 --- /dev/null +++ b/c/include/cuvs/neighbors/ivf_sq.h @@ -0,0 +1,373 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @defgroup ivf_sq_c_index_params IVF-SQ index build parameters + * @{ + */ +/** + * @brief Supplemental parameters to build IVF-SQ Index + * + */ +struct cuvsIvfSqIndexParams { + /** Distance type. */ + cuvsDistanceType metric; + /** The argument used by some distance metrics. */ + float metric_arg; + /** + * Whether to add the dataset content to the index, i.e.: + * + * - `true` means the index is filled with the dataset vectors and ready to search after calling + * `build`. + * - `false` means `build` only trains the underlying model (e.g. quantizer or clustering), but + * the index is left empty; you'd need to call `extend` on the index afterwards to populate it. + */ + bool add_data_on_build; + /** The number of inverted lists (clusters) */ + uint32_t n_lists; + /** The number of iterations searching for kmeans centers (index building). */ + uint32_t kmeans_n_iters; + /** The fraction of data to use during iterative kmeans building. */ + double kmeans_trainset_fraction; + /** + * By default (adaptive_centers = false), the cluster centers are trained in `ivf_sq::build`, + * and never modified in `ivf_sq::extend`. As a result, you may need to retrain the index + * from scratch after invoking (`ivf_sq::extend`) a few times with new data, the distribution of + * which is no longer representative of the original training set. + * + * The alternative behavior (adaptive_centers = true) is to update the cluster centers for new + * data when it is added. In this case, `index.centers()` are always exactly the centroids of the + * data in the corresponding clusters. The drawback of this behavior is that the centroids depend + * on the order of adding new data (through the classification of the added data); that is, + * `index.centers()` "drift" together with the changing distribution of the newly added data. + */ + bool adaptive_centers; + /** + * By default, the algorithm allocates more space than necessary for individual clusters + * (`list_data`). This allows to amortize the cost of memory allocation and reduce the number of + * data copies during repeated calls to `extend` (extending the database). + * + * The alternative is the conservative allocation behavior; when enabled, the algorithm always + * allocates the minimum amount of memory required to store the given number of records. Set this + * flag to `true` if you prefer to use as little GPU memory for the database as possible. + */ + bool conservative_memory_allocation; +}; + +typedef struct cuvsIvfSqIndexParams* cuvsIvfSqIndexParams_t; + +/** + * @brief Allocate IVF-SQ Index params, and populate with default values + * + * @param[in] index_params cuvsIvfSqIndexParams_t to allocate + * @return cuvsError_t + */ +cuvsError_t cuvsIvfSqIndexParamsCreate(cuvsIvfSqIndexParams_t* index_params); + +/** + * @brief De-allocate IVF-SQ Index params + * + * @param[in] index_params + * @return cuvsError_t + */ +cuvsError_t cuvsIvfSqIndexParamsDestroy(cuvsIvfSqIndexParams_t index_params); +/** + * @} + */ + +/** + * @defgroup ivf_sq_c_search_params IVF-SQ index search parameters + * @{ + */ +/** + * @brief Supplemental parameters to search IVF-SQ index + * + */ +struct cuvsIvfSqSearchParams { + /** The number of clusters to search. */ + uint32_t n_probes; +}; + +typedef struct cuvsIvfSqSearchParams* cuvsIvfSqSearchParams_t; + +/** + * @brief Allocate IVF-SQ search params, and populate with default values + * + * @param[in] params cuvsIvfSqSearchParams_t to allocate + * @return cuvsError_t + */ +cuvsError_t cuvsIvfSqSearchParamsCreate(cuvsIvfSqSearchParams_t* params); + +/** + * @brief De-allocate IVF-SQ search params + * + * @param[in] params + * @return cuvsError_t + */ +cuvsError_t cuvsIvfSqSearchParamsDestroy(cuvsIvfSqSearchParams_t params); +/** + * @} + */ + +/** + * @defgroup ivf_sq_c_index IVF-SQ index + * @{ + */ +/** + * @brief Struct to hold address of cuvs::neighbors::ivf_sq::index and its active trained dtype + * + */ +typedef struct { + uintptr_t addr; + DLDataType dtype; +} cuvsIvfSqIndex; + +typedef cuvsIvfSqIndex* cuvsIvfSqIndex_t; + +/** + * @brief Allocate IVF-SQ index + * + * @param[in] index cuvsIvfSqIndex_t to allocate + * @return cuvsError_t + */ +cuvsError_t cuvsIvfSqIndexCreate(cuvsIvfSqIndex_t* index); + +/** + * @brief De-allocate IVF-SQ index + * + * @param[in] index cuvsIvfSqIndex_t to de-allocate + */ +cuvsError_t cuvsIvfSqIndexDestroy(cuvsIvfSqIndex_t index); + +/** Get the number of clusters/inverted lists */ +cuvsError_t cuvsIvfSqIndexGetNLists(cuvsIvfSqIndex_t index, int64_t* n_lists); + +/** Get the dimensionality of the data */ +cuvsError_t cuvsIvfSqIndexGetDim(cuvsIvfSqIndex_t index, int64_t* dim); + +/** Get the size of the index */ +cuvsError_t cuvsIvfSqIndexGetSize(cuvsIvfSqIndex_t index, int64_t* size); + +/** + * @brief Get the cluster centers corresponding to the lists [n_lists, dim] + * + * @param[in] index cuvsIvfSqIndex_t Built Ivf-SQ Index + * @param[out] centers Preallocated array on host or device memory to store output, [n_lists, dim] + * @return cuvsError_t + */ +cuvsError_t cuvsIvfSqIndexGetCenters(cuvsIvfSqIndex_t index, DLManagedTensor* centers); + +/** + * @} + */ + +/** + * @defgroup ivf_sq_c_index_build IVF-SQ index build + * @{ + */ +/** + * @brief Build an IVF-SQ index with a `DLManagedTensor` which has underlying + * `DLDeviceType` equal to `kDLCUDA`, `kDLCUDAHost`, `kDLCUDAManaged`, + * or `kDLCPU`. Also, acceptable underlying types are: + * 1. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32` + * 2. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 16` + * + * @code {.c} + * #include + * #include + * + * // Create cuvsResources_t + * cuvsResources_t res; + * cuvsError_t res_create_status = cuvsResourcesCreate(&res); + * + * // Assume a populated `DLManagedTensor` type here + * DLManagedTensor dataset; + * + * // Create default index params + * cuvsIvfSqIndexParams_t index_params; + * cuvsError_t params_create_status = cuvsIvfSqIndexParamsCreate(&index_params); + * + * // Create IVF-SQ index + * cuvsIvfSqIndex_t index; + * cuvsError_t index_create_status = cuvsIvfSqIndexCreate(&index); + * + * // Build the IVF-SQ Index + * cuvsError_t build_status = cuvsIvfSqBuild(res, index_params, &dataset, index); + * + * // de-allocate `index_params`, `index` and `res` + * cuvsError_t params_destroy_status = cuvsIvfSqIndexParamsDestroy(index_params); + * cuvsError_t index_destroy_status = cuvsIvfSqIndexDestroy(index); + * cuvsError_t res_destroy_status = cuvsResourcesDestroy(res); + * @endcode + * + * @param[in] res cuvsResources_t opaque C handle + * @param[in] index_params cuvsIvfSqIndexParams_t used to build IVF-SQ index + * @param[in] dataset DLManagedTensor* training dataset + * @param[out] index cuvsIvfSqIndex_t Newly built IVF-SQ index + * @return cuvsError_t + */ +cuvsError_t cuvsIvfSqBuild(cuvsResources_t res, + cuvsIvfSqIndexParams_t index_params, + DLManagedTensor* dataset, + cuvsIvfSqIndex_t index); +/** + * @} + */ + +/** + * @defgroup ivf_sq_c_index_search IVF-SQ index search + * @{ + */ +/** + * @brief Search an IVF-SQ index with a `DLManagedTensor` which has underlying + * `DLDeviceType` equal to `kDLCUDA`, `kDLCUDAHost`, `kDLCUDAManaged`. + * Types for input are: + * 1. `queries`: `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32` or 16 + * 2. `neighbors`: `kDLDataType.code == kDLInt` and `kDLDataType.bits = 64` + * 3. `distances`: `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32` + * + * @code {.c} + * #include + * #include + * + * // Create cuvsResources_t + * cuvsResources_t res; + * cuvsError_t res_create_status = cuvsResourcesCreate(&res); + * + * // Assume a populated `DLManagedTensor` type here + * DLManagedTensor queries; + * DLManagedTensor neighbors; + * DLManagedTensor distances; + * + * // Create default search params + * cuvsIvfSqSearchParams_t search_params; + * cuvsError_t params_create_status = cuvsIvfSqSearchParamsCreate(&search_params); + * + * // Search the `index` built using `cuvsIvfSqBuild` + * cuvsError_t search_status = cuvsIvfSqSearch(res, search_params, index, &queries, &neighbors, + * &distances); + * + * // de-allocate `search_params` and `res` + * cuvsError_t params_destroy_status = cuvsIvfSqSearchParamsDestroy(search_params); + * cuvsError_t res_destroy_status = cuvsResourcesDestroy(res); + * @endcode + * + * @param[in] res cuvsResources_t opaque C handle + * @param[in] search_params cuvsIvfSqSearchParams_t used to search IVF-SQ index + * @param[in] index ivfSqIndex which has been returned by `cuvsIvfSqBuild` + * @param[in] queries DLManagedTensor* queries dataset to search + * @param[out] neighbors DLManagedTensor* output `k` neighbors for queries + * @param[out] distances DLManagedTensor* output `k` distances for queries + */ +cuvsError_t cuvsIvfSqSearch(cuvsResources_t res, + cuvsIvfSqSearchParams_t search_params, + cuvsIvfSqIndex_t index, + DLManagedTensor* queries, + DLManagedTensor* neighbors, + DLManagedTensor* distances); + +/** + * @brief Search an IVF-SQ index with filtering. + * + * Same as cuvsIvfSqSearch, but applies a pre-filter to exclude vectors during search. + * + * @param[in] res cuvsResources_t opaque C handle + * @param[in] search_params cuvsIvfSqSearchParams_t used to search IVF-SQ index + * @param[in] index ivfSqIndex which has been returned by `cuvsIvfSqBuild` + * @param[in] queries DLManagedTensor* queries dataset to search + * @param[out] neighbors DLManagedTensor* output `k` neighbors for queries + * @param[out] distances DLManagedTensor* output `k` distances for queries + * @param[in] filter cuvsFilter to filter neighbors based on the given bitset + */ +cuvsError_t cuvsIvfSqSearchWithFilter(cuvsResources_t res, + cuvsIvfSqSearchParams_t search_params, + cuvsIvfSqIndex_t index, + DLManagedTensor* queries, + DLManagedTensor* neighbors, + DLManagedTensor* distances, + cuvsFilter filter); + +/** + * @} + */ + +/** + * @defgroup ivf_sq_c_index_serialize IVF-SQ C-API serialize functions + * @{ + */ +/** + * Save the index to file. + * + * Experimental, both the API and the serialization format are subject to change. + * + * @code{.c} + * #include + * + * // Create cuvsResources_t + * cuvsResources_t res; + * cuvsError_t res_create_status = cuvsResourcesCreate(&res); + * + * // create an index with `cuvsIvfSqBuild` + * cuvsIvfSqSerialize(res, "/path/to/index", index); + * @endcode + * + * @param[in] res cuvsResources_t opaque C handle + * @param[in] filename the file name for saving the index + * @param[in] index IVF-SQ index + */ +cuvsError_t cuvsIvfSqSerialize(cuvsResources_t res, const char* filename, cuvsIvfSqIndex_t index); + +/** + * Load index from file. + * + * Experimental, both the API and the serialization format are subject to change. + * + * @param[in] res cuvsResources_t opaque C handle + * @param[in] filename the name of the file that stores the index + * @param[out] index IVF-SQ index loaded from disk + */ +cuvsError_t cuvsIvfSqDeserialize(cuvsResources_t res, + const char* filename, + cuvsIvfSqIndex_t index); +/** + * @} + */ + +/** + * @defgroup ivf_sq_c_index_extend IVF-SQ index extend + * @{ + */ +/** + * @brief Extend the index with the new data. + * + * @param[in] res cuvsResources_t opaque C handle + * @param[in] new_vectors DLManagedTensor* the new vectors to add to the index + * @param[in] new_indices DLManagedTensor* vector of new indices for the new vectors + * @param[inout] index IVF-SQ index to be extended + * @return cuvsError_t + */ +cuvsError_t cuvsIvfSqExtend(cuvsResources_t res, + DLManagedTensor* new_vectors, + DLManagedTensor* new_indices, + cuvsIvfSqIndex_t index); +/** + * @} + */ +#ifdef __cplusplus +} +#endif diff --git a/c/src/neighbors/ivf_sq.cpp b/c/src/neighbors/ivf_sq.cpp new file mode 100644 index 0000000000..eadf84a299 --- /dev/null +++ b/c/src/neighbors/ivf_sq.cpp @@ -0,0 +1,363 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include + +#include +#include +#include + +#include +#include +#include + +#include "../core/exceptions.hpp" +#include "../core/interop.hpp" + +namespace cuvs::neighbors::ivf_sq { +void convert_c_index_params(cuvsIvfSqIndexParams params, + cuvs::neighbors::ivf_sq::index_params* out) +{ + out->metric = static_cast((int)params.metric); + out->metric_arg = params.metric_arg; + out->add_data_on_build = params.add_data_on_build; + out->n_lists = params.n_lists; + out->kmeans_n_iters = params.kmeans_n_iters; + out->kmeans_trainset_fraction = params.kmeans_trainset_fraction; + out->adaptive_centers = params.adaptive_centers; + out->conservative_memory_allocation = params.conservative_memory_allocation; +} +void convert_c_search_params(cuvsIvfSqSearchParams params, + cuvs::neighbors::ivf_sq::search_params* out) +{ + out->n_probes = params.n_probes; +} +} // namespace cuvs::neighbors::ivf_sq + +namespace { + +template +void* _build(cuvsResources_t res, cuvsIvfSqIndexParams params, DLManagedTensor* dataset_tensor) +{ + auto res_ptr = reinterpret_cast(res); + + auto build_params = cuvs::neighbors::ivf_sq::index_params(); + cuvs::neighbors::ivf_sq::convert_c_index_params(params, &build_params); + + auto dataset = dataset_tensor->dl_tensor; + auto dim = dataset.shape[1]; + + auto index = new cuvs::neighbors::ivf_sq::index(*res_ptr, build_params, dim); + + if (cuvs::core::is_dlpack_device_compatible(dataset)) { + using mdspan_type = raft::device_matrix_view; + auto mds = cuvs::core::from_dlpack(dataset_tensor); + cuvs::neighbors::ivf_sq::build(*res_ptr, build_params, mds, *index); + } else { + using mdspan_type = raft::host_matrix_view; + auto mds = cuvs::core::from_dlpack(dataset_tensor); + cuvs::neighbors::ivf_sq::build(*res_ptr, build_params, mds, *index); + } + + return index; +} + +template +void _search(cuvsResources_t res, + cuvsIvfSqSearchParams params, + cuvsIvfSqIndex index, + DLManagedTensor* queries_tensor, + DLManagedTensor* neighbors_tensor, + DLManagedTensor* distances_tensor, + cuvsFilter* filter) +{ + auto res_ptr = reinterpret_cast(res); + auto index_ptr = reinterpret_cast*>(index.addr); + + auto search_params = cuvs::neighbors::ivf_sq::search_params(); + cuvs::neighbors::ivf_sq::convert_c_search_params(params, &search_params); + + using queries_mdspan_type = raft::device_matrix_view; + using neighbors_mdspan_type = raft::device_matrix_view; + using distances_mdspan_type = raft::device_matrix_view; + auto queries_mds = cuvs::core::from_dlpack(queries_tensor); + auto neighbors_mds = cuvs::core::from_dlpack(neighbors_tensor); + auto distances_mds = cuvs::core::from_dlpack(distances_tensor); + + if (filter == nullptr || filter->type == NO_FILTER) { + cuvs::neighbors::ivf_sq::search( + *res_ptr, search_params, *index_ptr, queries_mds, neighbors_mds, distances_mds); + } else if (filter->type == BITSET) { + using filter_mdspan_type = raft::device_vector_view; + auto removed_indices_tensor = reinterpret_cast(filter->addr); + auto removed_indices = cuvs::core::from_dlpack(removed_indices_tensor); + cuvs::core::bitset_view removed_indices_bitset(removed_indices, + index_ptr->size()); + auto bitset_filter_obj = cuvs::neighbors::filtering::bitset_filter(removed_indices_bitset); + cuvs::neighbors::ivf_sq::search(*res_ptr, + search_params, + *index_ptr, + queries_mds, + neighbors_mds, + distances_mds, + bitset_filter_obj); + } else { + RAFT_FAIL("Unsupported filter type: BITMAP"); + } +} + +void _serialize(cuvsResources_t res, const char* filename, cuvsIvfSqIndex index) +{ + auto res_ptr = reinterpret_cast(res); + auto index_ptr = reinterpret_cast*>(index.addr); + cuvs::neighbors::ivf_sq::serialize(*res_ptr, std::string(filename), *index_ptr); +} + +void* _deserialize(cuvsResources_t res, const char* filename) +{ + auto res_ptr = reinterpret_cast(res); + auto index = new cuvs::neighbors::ivf_sq::index(*res_ptr); + cuvs::neighbors::ivf_sq::deserialize(*res_ptr, std::string(filename), index); + return index; +} + +template +void _extend(cuvsResources_t res, + DLManagedTensor* new_vectors, + DLManagedTensor* new_indices, + cuvsIvfSqIndex index) +{ + auto res_ptr = reinterpret_cast(res); + auto index_ptr = reinterpret_cast*>(index.addr); + + bool on_device = cuvs::core::is_dlpack_device_compatible(new_vectors->dl_tensor); + if (on_device != cuvs::core::is_dlpack_device_compatible(new_indices->dl_tensor)) { + RAFT_FAIL("extend inputs must both either be on device memory or host memory"); + } + + if (on_device) { + using vectors_mdspan_type = raft::device_matrix_view; + using indices_mdspan_type = raft::device_vector_view; + auto vectors_mds = cuvs::core::from_dlpack(new_vectors); + auto indices_mds = cuvs::core::from_dlpack(new_indices); + cuvs::neighbors::ivf_sq::extend(*res_ptr, vectors_mds, indices_mds, index_ptr); + } else { + using vectors_mdspan_type = raft::host_matrix_view; + using indices_mdspan_type = raft::host_vector_view; + auto vectors_mds = cuvs::core::from_dlpack(new_vectors); + auto indices_mds = cuvs::core::from_dlpack(new_indices); + cuvs::neighbors::ivf_sq::extend(*res_ptr, vectors_mds, indices_mds, index_ptr); + } +} + +void _get_centers(cuvsIvfSqIndex index, DLManagedTensor* centers) +{ + auto index_ptr = reinterpret_cast*>(index.addr); + cuvs::core::to_dlpack(index_ptr->centers(), centers); +} +} // namespace + +extern "C" cuvsError_t cuvsIvfSqIndexCreate(cuvsIvfSqIndex_t* index) +{ + return cuvs::core::translate_exceptions([=] { *index = new cuvsIvfSqIndex{}; }); +} + +extern "C" cuvsError_t cuvsIvfSqIndexDestroy(cuvsIvfSqIndex_t index_c_ptr) +{ + return cuvs::core::translate_exceptions([=] { + auto index = *index_c_ptr; + auto index_ptr = reinterpret_cast*>(index.addr); + delete index_ptr; + delete index_c_ptr; + }); +} + +extern "C" cuvsError_t cuvsIvfSqBuild(cuvsResources_t res, + cuvsIvfSqIndexParams_t params, + DLManagedTensor* dataset_tensor, + cuvsIvfSqIndex_t index) +{ + return cuvs::core::translate_exceptions([=] { + auto dataset = dataset_tensor->dl_tensor; + + index->dtype.code = dataset.dtype.code; + index->dtype.bits = dataset.dtype.bits; + + if (dataset.dtype.code == kDLFloat && dataset.dtype.bits == 32) { + index->addr = reinterpret_cast(_build(res, *params, dataset_tensor)); + } else if (dataset.dtype.code == kDLFloat && dataset.dtype.bits == 16) { + index->addr = reinterpret_cast(_build(res, *params, dataset_tensor)); + } else { + RAFT_FAIL("Unsupported dataset DLtensor dtype: %d and bits: %d", + dataset.dtype.code, + dataset.dtype.bits); + } + }); +} + +static cuvsError_t _cuvsIvfSqSearchImpl(cuvsResources_t res, + cuvsIvfSqSearchParams_t params, + cuvsIvfSqIndex_t index_c_ptr, + DLManagedTensor* queries_tensor, + DLManagedTensor* neighbors_tensor, + DLManagedTensor* distances_tensor, + cuvsFilter* filter) +{ + return cuvs::core::translate_exceptions([=] { + auto queries = queries_tensor->dl_tensor; + auto neighbors = neighbors_tensor->dl_tensor; + auto distances = distances_tensor->dl_tensor; + + RAFT_EXPECTS(cuvs::core::is_dlpack_device_compatible(queries), + "queries should have device compatible memory"); + RAFT_EXPECTS(cuvs::core::is_dlpack_device_compatible(neighbors), + "neighbors should have device compatible memory"); + RAFT_EXPECTS(cuvs::core::is_dlpack_device_compatible(distances), + "distances should have device compatible memory"); + + RAFT_EXPECTS(neighbors.dtype.code == kDLInt && neighbors.dtype.bits == 64, + "neighbors should be of type int64_t"); + RAFT_EXPECTS(distances.dtype.code == kDLFloat && distances.dtype.bits == 32, + "distances should be of type float32"); + + auto index = *index_c_ptr; + if (queries.dtype.code == kDLFloat && queries.dtype.bits == 32) { + _search( + res, *params, index, queries_tensor, neighbors_tensor, distances_tensor, filter); + } else if (queries.dtype.code == kDLFloat && queries.dtype.bits == 16) { + _search( + res, *params, index, queries_tensor, neighbors_tensor, distances_tensor, filter); + } else { + RAFT_FAIL("Unsupported queries DLtensor dtype: %d and bits: %d", + queries.dtype.code, + queries.dtype.bits); + } + }); +} + +extern "C" cuvsError_t cuvsIvfSqSearch(cuvsResources_t res, + cuvsIvfSqSearchParams_t params, + cuvsIvfSqIndex_t index_c_ptr, + DLManagedTensor* queries_tensor, + DLManagedTensor* neighbors_tensor, + DLManagedTensor* distances_tensor) +{ + return _cuvsIvfSqSearchImpl( + res, params, index_c_ptr, queries_tensor, neighbors_tensor, distances_tensor, nullptr); +} + +extern "C" cuvsError_t cuvsIvfSqSearchWithFilter(cuvsResources_t res, + cuvsIvfSqSearchParams_t params, + cuvsIvfSqIndex_t index_c_ptr, + DLManagedTensor* queries_tensor, + DLManagedTensor* neighbors_tensor, + DLManagedTensor* distances_tensor, + cuvsFilter filter) +{ + return _cuvsIvfSqSearchImpl( + res, params, index_c_ptr, queries_tensor, neighbors_tensor, distances_tensor, &filter); +} + +extern "C" cuvsError_t cuvsIvfSqIndexParamsCreate(cuvsIvfSqIndexParams_t* params) +{ + return cuvs::core::translate_exceptions([=] { + *params = new cuvsIvfSqIndexParams{.metric = L2Expanded, + .metric_arg = 2.0f, + .add_data_on_build = true, + .n_lists = 1024, + .kmeans_n_iters = 20, + .kmeans_trainset_fraction = 0.5, + .adaptive_centers = false, + .conservative_memory_allocation = false}; + }); +} + +extern "C" cuvsError_t cuvsIvfSqIndexParamsDestroy(cuvsIvfSqIndexParams_t params) +{ + return cuvs::core::translate_exceptions([=] { delete params; }); +} + +extern "C" cuvsError_t cuvsIvfSqSearchParamsCreate(cuvsIvfSqSearchParams_t* params) +{ + return cuvs::core::translate_exceptions( + [=] { *params = new cuvsIvfSqSearchParams{.n_probes = 20}; }); +} + +extern "C" cuvsError_t cuvsIvfSqSearchParamsDestroy(cuvsIvfSqSearchParams_t params) +{ + return cuvs::core::translate_exceptions([=] { delete params; }); +} + +extern "C" cuvsError_t cuvsIvfSqDeserialize(cuvsResources_t res, + const char* filename, + cuvsIvfSqIndex_t index) +{ + return cuvs::core::translate_exceptions( + [=] { index->addr = reinterpret_cast(_deserialize(res, filename)); }); +} + +extern "C" cuvsError_t cuvsIvfSqSerialize(cuvsResources_t res, + const char* filename, + cuvsIvfSqIndex_t index) +{ + return cuvs::core::translate_exceptions([=] { _serialize(res, filename, *index); }); +} + +extern "C" cuvsError_t cuvsIvfSqExtend(cuvsResources_t res, + DLManagedTensor* new_vectors, + DLManagedTensor* new_indices, + cuvsIvfSqIndex_t index) +{ + return cuvs::core::translate_exceptions([=] { + auto vectors = new_vectors->dl_tensor; + + if (index->dtype.code == 0 && index->dtype.bits == 0) { + index->dtype.code = vectors.dtype.code; + index->dtype.bits = vectors.dtype.bits; + } + + if (vectors.dtype.code == kDLFloat && vectors.dtype.bits == 32) { + _extend(res, new_vectors, new_indices, *index); + } else if (vectors.dtype.code == kDLFloat && vectors.dtype.bits == 16) { + _extend(res, new_vectors, new_indices, *index); + } else { + RAFT_FAIL( + "Unsupported vectors DLtensor dtype: %d and bits: %d", vectors.dtype.code, vectors.dtype.bits); + } + }); +} + +extern "C" cuvsError_t cuvsIvfSqIndexGetNLists(cuvsIvfSqIndex_t index, int64_t* n_lists) +{ + return cuvs::core::translate_exceptions([=] { + auto index_ptr = + reinterpret_cast*>(index->addr); + *n_lists = index_ptr->n_lists(); + }); +} + +extern "C" cuvsError_t cuvsIvfSqIndexGetDim(cuvsIvfSqIndex_t index, int64_t* dim) +{ + return cuvs::core::translate_exceptions([=] { + auto index_ptr = + reinterpret_cast*>(index->addr); + *dim = index_ptr->dim(); + }); +} + +extern "C" cuvsError_t cuvsIvfSqIndexGetSize(cuvsIvfSqIndex_t index, int64_t* size) +{ + return cuvs::core::translate_exceptions([=] { + auto index_ptr = + reinterpret_cast*>(index->addr); + *size = index_ptr->size(); + }); +} + +extern "C" cuvsError_t cuvsIvfSqIndexGetCenters(cuvsIvfSqIndex_t index, DLManagedTensor* centers) +{ + return cuvs::core::translate_exceptions([=] { _get_centers(*index, centers); }); +} diff --git a/c/src/neighbors/ivf_sq.hpp b/c/src/neighbors/ivf_sq.hpp new file mode 100644 index 0000000000..3a08bc689a --- /dev/null +++ b/c/src/neighbors/ivf_sq.hpp @@ -0,0 +1,14 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ +#include +#include + +namespace cuvs::neighbors::ivf_sq { +/// Converts a cuvsIvfSqIndexParams struct (c) to a ivf_sq::index_params (C++) struct +void convert_c_index_params(cuvsIvfSqIndexParams params, + cuvs::neighbors::ivf_sq::index_params* out); +void convert_c_search_params(cuvsIvfSqSearchParams params, + cuvs::neighbors::ivf_sq::search_params* out); +} // namespace cuvs::neighbors::ivf_sq diff --git a/c/tests/CMakeLists.txt b/c/tests/CMakeLists.txt index 6d52e5b174..9c96fc4120 100644 --- a/c/tests/CMakeLists.txt +++ b/c/tests/CMakeLists.txt @@ -1,6 +1,6 @@ # ============================================================================= # cmake-format: off -# SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 # cmake-format: on # ============================================================================= @@ -77,6 +77,7 @@ ConfigureTest( ConfigureTest(NAME BRUTEFORCE_C_TEST PATH neighbors/run_brute_force_c.c neighbors/brute_force_c.cu) ConfigureTest(NAME IVF_FLAT_C_TEST PATH neighbors/run_ivf_flat_c.c neighbors/ann_ivf_flat_c.cu) ConfigureTest(NAME IVF_PQ_C_TEST PATH neighbors/run_ivf_pq_c.c neighbors/ann_ivf_pq_c.cu) +ConfigureTest(NAME IVF_SQ_C_TEST PATH neighbors/run_ivf_sq_c.c neighbors/ann_ivf_sq_c.cu) ConfigureTest(NAME CAGRA_C_TEST PATH neighbors/ann_cagra_c.cu) ConfigureTest(NAME MG_C_TEST PATH neighbors/run_mg_c.c neighbors/ann_mg_c.cu) ConfigureTest( diff --git a/c/tests/neighbors/ann_ivf_sq_c.cu b/c/tests/neighbors/ann_ivf_sq_c.cu new file mode 100644 index 0000000000..c36786e45a --- /dev/null +++ b/c/tests/neighbors/ann_ivf_sq_c.cu @@ -0,0 +1,130 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include +#include +#include + +#include "neighbors/ann_utils.cuh" +#include + +extern "C" void run_ivf_sq(int64_t n_rows, + int64_t n_queries, + int64_t n_dim, + uint32_t n_neighbors, + float* index_data, + float* query_data, + float* distances_data, + int64_t* neighbors_data, + cuvsDistanceType metric, + size_t n_probes, + size_t n_lists); + +template +void generate_random_data(T* devPtr, size_t size) +{ + raft::handle_t handle; + raft::random::RngState r(1234ULL); + raft::random::uniform(handle, r, devPtr, size, T(0.1), T(2.0)); +}; + +template +void recall_eval(T* query_data, + T* index_data, + IdxT* neighbors, + T* distances, + size_t n_queries, + size_t n_rows, + size_t n_dim, + size_t n_neighbors, + cuvsDistanceType metric, + size_t n_probes, + size_t n_lists) +{ + raft::handle_t handle; + auto distances_ref = raft::make_device_matrix(handle, n_queries, n_neighbors); + auto neighbors_ref = raft::make_device_matrix(handle, n_queries, n_neighbors); + cuvs::neighbors::naive_knn( + handle, + distances_ref.data_handle(), + neighbors_ref.data_handle(), + query_data, + index_data, + n_queries, + n_rows, + n_dim, + n_neighbors, + static_cast((uint16_t)metric)); + + size_t size = n_queries * n_neighbors; + std::vector neighbors_h(size); + std::vector distances_h(size); + std::vector neighbors_ref_h(size); + std::vector distances_ref_h(size); + + auto stream = raft::resource::get_cuda_stream(handle); + raft::copy(neighbors_h.data(), neighbors, size, stream); + raft::copy(distances_h.data(), distances, size, stream); + raft::copy(neighbors_ref_h.data(), neighbors_ref.data_handle(), size, stream); + raft::copy(distances_ref_h.data(), distances_ref.data_handle(), size, stream); + + double min_recall = static_cast(n_probes) / static_cast(n_lists); + ASSERT_TRUE(cuvs::neighbors::eval_neighbours(neighbors_ref_h, + neighbors_h, + distances_ref_h, + distances_h, + n_queries, + n_neighbors, + 0.001, + min_recall)); +}; + +TEST(IvfSqC, BuildSearch) +{ + int64_t n_rows = 8096; + int64_t n_queries = 128; + int64_t n_dim = 32; + uint32_t n_neighbors = 8; + + raft::handle_t handle; + auto stream = raft::resource::get_cuda_stream(handle); + + cuvsDistanceType metric = L2Expanded; + size_t n_probes = 20; + size_t n_lists = 1024; + + rmm::device_uvector index_data(n_rows * n_dim, stream); + rmm::device_uvector query_data(n_queries * n_dim, stream); + rmm::device_uvector neighbors_data(n_queries * n_neighbors, stream); + rmm::device_uvector distances_data(n_queries * n_neighbors, stream); + + generate_random_data(index_data.data(), n_rows * n_dim); + generate_random_data(query_data.data(), n_queries * n_dim); + + run_ivf_sq(n_rows, + n_queries, + n_dim, + n_neighbors, + index_data.data(), + query_data.data(), + distances_data.data(), + neighbors_data.data(), + metric, + n_probes, + n_lists); + + recall_eval(query_data.data(), + index_data.data(), + neighbors_data.data(), + distances_data.data(), + n_queries, + n_rows, + n_dim, + n_neighbors, + metric, + n_probes, + n_lists); +} diff --git a/c/tests/neighbors/c_api.c b/c/tests/neighbors/c_api.c index 6988aaf618..86108ea703 100644 --- a/c/tests/neighbors/c_api.c +++ b/c/tests/neighbors/c_api.c @@ -1,11 +1,12 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ #include #include #include +#include #include #include @@ -47,6 +48,15 @@ void test_compile_tiered_index() cuvsTieredIndexExtend(resources, &dataset, tiered_index); } +void test_compile_ivf_sq() +{ + assert(!"test_compile_ivf_sq is not meant to be run"); + + cuvsIvfSqIndex_t index; + cuvsIvfSqIndexCreate(&index); + cuvsIvfSqIndexDestroy(index); +} + void test_compile_all_neighbors() { // Smoke test to ensure that the all_neighbors.h API compiles correctly @@ -66,6 +76,7 @@ int main() // These are smoke tests that check that the C-APIs compile with a C compiler. // These are not meant to be run. test_compile_cagra(); + test_compile_ivf_sq(); test_compile_tiered_index(); test_compile_all_neighbors(); diff --git a/c/tests/neighbors/run_ivf_sq_c.c b/c/tests/neighbors/run_ivf_sq_c.c new file mode 100644 index 0000000000..d07502abd6 --- /dev/null +++ b/c/tests/neighbors/run_ivf_sq_c.c @@ -0,0 +1,86 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#include + +void run_ivf_sq(int64_t n_rows, + int64_t n_queries, + int64_t n_dim, + uint32_t n_neighbors, + float* index_data, + float* query_data, + float* distances_data, + int64_t* neighbors_data, + cuvsDistanceType metric, + size_t n_probes, + size_t n_lists) +{ + cuvsResources_t res; + cuvsResourcesCreate(&res); + + DLManagedTensor dataset_tensor; + dataset_tensor.dl_tensor.data = index_data; + dataset_tensor.dl_tensor.device.device_type = kDLCUDA; + dataset_tensor.dl_tensor.ndim = 2; + dataset_tensor.dl_tensor.dtype.code = kDLFloat; + dataset_tensor.dl_tensor.dtype.bits = 32; + dataset_tensor.dl_tensor.dtype.lanes = 1; + int64_t dataset_shape[2] = {n_rows, n_dim}; + dataset_tensor.dl_tensor.shape = dataset_shape; + dataset_tensor.dl_tensor.strides = NULL; + + cuvsIvfSqIndex_t index; + cuvsIvfSqIndexCreate(&index); + + cuvsIvfSqIndexParams_t build_params; + cuvsIvfSqIndexParamsCreate(&build_params); + build_params->metric = metric; + build_params->n_lists = n_lists; + cuvsIvfSqBuild(res, build_params, &dataset_tensor, index); + + DLManagedTensor queries_tensor; + queries_tensor.dl_tensor.data = (void*)query_data; + queries_tensor.dl_tensor.device.device_type = kDLCUDA; + queries_tensor.dl_tensor.ndim = 2; + queries_tensor.dl_tensor.dtype.code = kDLFloat; + queries_tensor.dl_tensor.dtype.bits = 32; + queries_tensor.dl_tensor.dtype.lanes = 1; + int64_t queries_shape[2] = {n_queries, n_dim}; + queries_tensor.dl_tensor.shape = queries_shape; + queries_tensor.dl_tensor.strides = NULL; + + DLManagedTensor neighbors_tensor; + neighbors_tensor.dl_tensor.data = (void*)neighbors_data; + neighbors_tensor.dl_tensor.device.device_type = kDLCUDA; + neighbors_tensor.dl_tensor.ndim = 2; + neighbors_tensor.dl_tensor.dtype.code = kDLInt; + neighbors_tensor.dl_tensor.dtype.bits = 64; + neighbors_tensor.dl_tensor.dtype.lanes = 1; + int64_t neighbors_shape[2] = {n_queries, n_neighbors}; + neighbors_tensor.dl_tensor.shape = neighbors_shape; + neighbors_tensor.dl_tensor.strides = NULL; + + DLManagedTensor distances_tensor; + distances_tensor.dl_tensor.data = (void*)distances_data; + distances_tensor.dl_tensor.device.device_type = kDLCUDA; + distances_tensor.dl_tensor.ndim = 2; + distances_tensor.dl_tensor.dtype.code = kDLFloat; + distances_tensor.dl_tensor.dtype.bits = 32; + distances_tensor.dl_tensor.dtype.lanes = 1; + int64_t distances_shape[2] = {n_queries, n_neighbors}; + distances_tensor.dl_tensor.shape = distances_shape; + distances_tensor.dl_tensor.strides = NULL; + + cuvsIvfSqSearchParams_t search_params; + cuvsIvfSqSearchParamsCreate(&search_params); + search_params->n_probes = n_probes; + cuvsIvfSqSearch( + res, search_params, index, &queries_tensor, &neighbors_tensor, &distances_tensor); + + cuvsIvfSqSearchParamsDestroy(search_params); + cuvsIvfSqIndexParamsDestroy(build_params); + cuvsIvfSqIndexDestroy(index); + cuvsResourcesDestroy(res); +} diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index d4e9f2c6b4..78fd606491 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -633,6 +633,12 @@ if(NOT BUILD_CPU_ONLY) src/neighbors/ivf_pq/detail/ivf_pq_transform_half_int64_t.cu src/neighbors/ivf_pq/detail/ivf_pq_transform_int8_t_int64_t.cu src/neighbors/ivf_pq/detail/ivf_pq_transform_uint8_t_int64_t.cu + src/neighbors/ivf_sq_index.cpp + src/neighbors/ivf_sq/ivf_sq_build_extend_float_uint8_t_int64_t.cu + src/neighbors/ivf_sq/ivf_sq_build_extend_half_uint8_t_int64_t.cu + src/neighbors/ivf_sq/ivf_sq_search_float_uint8_t_int64_t.cu + src/neighbors/ivf_sq/ivf_sq_search_half_uint8_t_int64_t.cu + src/neighbors/ivf_sq/ivf_sq_serialize_uint8_t.cu src/neighbors/knn_merge_parts.cu src/neighbors/nn_descent.cu src/neighbors/nn_descent_float.cu diff --git a/cpp/bench/ann/CMakeLists.txt b/cpp/bench/ann/CMakeLists.txt index 4e4527267c..c377b64ec6 100644 --- a/cpp/bench/ann/CMakeLists.txt +++ b/cpp/bench/ann/CMakeLists.txt @@ -24,6 +24,7 @@ option(CUVS_ANN_BENCH_USE_FAISS_CPU_IVF_FLAT "Include faiss' cpu ivf flat algori option(CUVS_ANN_BENCH_USE_FAISS_CPU_IVF_PQ "Include faiss' cpu ivf pq algorithm in benchmark" ON) option(CUVS_ANN_BENCH_USE_FAISS_CPU_HNSW_FLAT "Include faiss' hnsw algorithm in benchmark" ON) option(CUVS_ANN_BENCH_USE_CUVS_IVF_FLAT "Include cuVS ivf flat algorithm in benchmark" ON) +option(CUVS_ANN_BENCH_USE_CUVS_IVF_SQ "Include cuVS ivf sq algorithm in benchmark" ON) option(CUVS_ANN_BENCH_USE_CUVS_IVF_PQ "Include cuVS ivf pq algorithm in benchmark" ON) option(CUVS_ANN_BENCH_USE_CUVS_CAGRA "Include cuVS CAGRA in benchmark" ON) option(CUVS_ANN_BENCH_USE_CUVS_BRUTE_FORCE "Include cuVS brute force knn in benchmark" ON) @@ -80,6 +81,7 @@ set(CUVS_USE_FAISS_STATIC ON) if(BUILD_CPU_ONLY) set(CUVS_FAISS_ENABLE_GPU OFF) set(CUVS_ANN_BENCH_USE_CUVS_IVF_FLAT OFF) + set(CUVS_ANN_BENCH_USE_CUVS_IVF_SQ OFF) set(CUVS_ANN_BENCH_USE_CUVS_IVF_PQ OFF) set(CUVS_ANN_BENCH_USE_CUVS_CAGRA OFF) set(CUVS_ANN_BENCH_USE_CUVS_BRUTE_FORCE OFF) @@ -97,6 +99,7 @@ set(CUVS_ANN_BENCH_USE_CUVS OFF) if(CUVS_ANN_BENCH_USE_CUVS_IVF_PQ OR CUVS_ANN_BENCH_USE_CUVS_BRUTE_FORCE OR CUVS_ANN_BENCH_USE_CUVS_IVF_FLAT + OR CUVS_ANN_BENCH_USE_CUVS_IVF_SQ OR CUVS_ANN_BENCH_USE_CUVS_CAGRA OR CUVS_ANN_BENCH_USE_CUVS_CAGRA_HNSWLIB OR CUVS_KNN_BENCH_USE_CUVS_BRUTE_FORCE @@ -244,6 +247,12 @@ if(CUVS_ANN_BENCH_USE_CUVS_IVF_FLAT) ) endif() +if(CUVS_ANN_BENCH_USE_CUVS_IVF_SQ) + ConfigureAnnBench( + NAME CUVS_IVF_SQ PATH src/cuvs/cuvs_benchmark.cu src/cuvs/cuvs_ivf_sq.cu LINKS cuvs + ) +endif() + if(CUVS_ANN_BENCH_USE_CUVS_BRUTE_FORCE) ConfigureAnnBench(NAME CUVS_BRUTE_FORCE PATH src/cuvs/cuvs_benchmark.cu LINKS cuvs) endif() diff --git a/cpp/bench/ann/src/cuvs/cuvs_ann_bench_param_parser.h b/cpp/bench/ann/src/cuvs/cuvs_ann_bench_param_parser.h index 2eaf3123a0..6e7a6e2179 100644 --- a/cpp/bench/ann/src/cuvs/cuvs_ann_bench_param_parser.h +++ b/cpp/bench/ann/src/cuvs/cuvs_ann_bench_param_parser.h @@ -35,6 +35,11 @@ extern template class cuvs::bench::cuvs_cagra; extern template class cuvs::bench::cuvs_cagra; #endif +#ifdef CUVS_ANN_BENCH_USE_CUVS_IVF_SQ +#include "cuvs_ivf_sq_wrapper.h" +extern template class cuvs::bench::cuvs_ivf_sq; +extern template class cuvs::bench::cuvs_ivf_sq; +#endif #ifdef CUVS_ANN_BENCH_USE_CUVS_MG #include "cuvs_ivf_flat_wrapper.h" #include "cuvs_mg_ivf_flat_wrapper.h" @@ -86,6 +91,26 @@ void parse_search_param(const nlohmann::json& conf, } #endif +#ifdef CUVS_ANN_BENCH_USE_CUVS_IVF_SQ +template +void parse_build_param(const nlohmann::json& conf, + typename cuvs::bench::cuvs_ivf_sq::build_param& param) +{ + param.n_lists = conf.at("nlist"); + if (conf.contains("niter")) { param.kmeans_n_iters = conf.at("niter"); } + if (conf.contains("ratio")) { + param.kmeans_trainset_fraction = 1.0 / static_cast(conf.at("ratio")); + } +} + +template +void parse_search_param(const nlohmann::json& conf, + typename cuvs::bench::cuvs_ivf_sq::search_param& param) +{ + param.ivf_sq_params.n_probes = conf.at("nprobe"); +} +#endif + #if defined(CUVS_ANN_BENCH_USE_CUVS_IVF_PQ) || defined(CUVS_ANN_BENCH_USE_CUVS_CAGRA) || \ defined(CUVS_ANN_BENCH_USE_CUVS_CAGRA_HNSWLIB) || defined(CUVS_ANN_BENCH_USE_CUVS_MG) || \ defined(CUVS_ANN_BENCH_USE_CUVS_CAGRA_DISKANN) diff --git a/cpp/bench/ann/src/cuvs/cuvs_benchmark.cu b/cpp/bench/ann/src/cuvs/cuvs_benchmark.cu index aebac654c2..22aeb31c38 100644 --- a/cpp/bench/ann/src/cuvs/cuvs_benchmark.cu +++ b/cpp/bench/ann/src/cuvs/cuvs_benchmark.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -84,6 +84,15 @@ auto create_algo(const std::string& algo_name, } } #endif +#ifdef CUVS_ANN_BENCH_USE_CUVS_IVF_SQ + if constexpr (std::is_same_v || std::is_same_v) { + if (algo_name == "cuvs_ivf_sq") { + typename cuvs::bench::cuvs_ivf_sq::build_param param; + parse_build_param(conf, param); + a = std::make_unique>(metric, dim, param); + } + } +#endif #ifdef CUVS_ANN_BENCH_USE_CUVS_IVF_PQ if (algo_name == "raft_ivf_pq" || algo_name == "cuvs_ivf_pq") { typename cuvs::bench::cuvs_ivf_pq::build_param param; @@ -151,6 +160,15 @@ auto create_search_param(const std::string& algo_name, const nlohmann::json& con } } #endif +#ifdef CUVS_ANN_BENCH_USE_CUVS_IVF_SQ + if constexpr (std::is_same_v || std::is_same_v) { + if (algo_name == "cuvs_ivf_sq") { + auto param = std::make_unique::search_param>(); + parse_search_param(conf, *param); + return param; + } + } +#endif #ifdef CUVS_ANN_BENCH_USE_CUVS_IVF_PQ if (algo_name == "raft_ivf_pq" || algo_name == "cuvs_ivf_pq") { auto param = std::make_unique::search_param>(); diff --git a/cpp/bench/ann/src/cuvs/cuvs_ivf_sq.cu b/cpp/bench/ann/src/cuvs/cuvs_ivf_sq.cu new file mode 100644 index 0000000000..ec41324c8d --- /dev/null +++ b/cpp/bench/ann/src/cuvs/cuvs_ivf_sq.cu @@ -0,0 +1,10 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ +#include "cuvs_ivf_sq_wrapper.h" + +namespace cuvs::bench { +template class cuvs_ivf_sq; +template class cuvs_ivf_sq; +} // namespace cuvs::bench diff --git a/cpp/bench/ann/src/cuvs/cuvs_ivf_sq_wrapper.h b/cpp/bench/ann/src/cuvs/cuvs_ivf_sq_wrapper.h new file mode 100644 index 0000000000..1503e6bb84 --- /dev/null +++ b/cpp/bench/ann/src/cuvs/cuvs_ivf_sq_wrapper.h @@ -0,0 +1,141 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ +#pragma once + +#include "../common/ann_types.hpp" +#include "cuvs_ann_bench_utils.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace cuvs::bench { + +template +class cuvs_ivf_sq : public algo, public algo_gpu { + public: + using search_param_base = typename algo::search_param; + + struct search_param : public search_param_base { + cuvs::neighbors::ivf_sq::search_params ivf_sq_params; + }; + + using build_param = cuvs::neighbors::ivf_sq::index_params; + + cuvs_ivf_sq(Metric metric, int dim, const build_param& param) + : algo(metric, dim), index_params_(param), dimension_(dim) + { + index_params_.metric = parse_metric_type(metric); + index_params_.conservative_memory_allocation = true; + RAFT_CUDA_TRY(cudaGetDevice(&device_)); + } + + void build(const T* dataset, size_t nrow) final; + + void set_search_param(const search_param_base& param, const void* filter_bitset) override; + + void search(const T* queries, + int batch_size, + int k, + algo_base::index_type* neighbors, + float* distances) const override; + + [[nodiscard]] auto get_sync_stream() const noexcept -> cudaStream_t override + { + return handle_.get_sync_stream(); + } + + [[nodiscard]] auto get_preference() const -> algo_property override + { + algo_property property; + property.dataset_memory_type = MemoryType::kHostMmap; + property.query_memory_type = MemoryType::kDevice; + return property; + } + + void save(const std::string& file) const override; + void load(const std::string&) override; + std::unique_ptr> copy() override; + + private: + configured_raft_resources handle_{}; + build_param index_params_; + cuvs::neighbors::ivf_sq::search_params search_params_; + std::shared_ptr> index_; + int device_; + int dimension_; + + std::shared_ptr filter_; +}; + +template +void cuvs_ivf_sq::build(const T* dataset, size_t nrow) +{ + size_t n_streams = 1; + raft::resource::set_cuda_stream_pool(handle_, std::make_shared(n_streams)); + index_ = std::make_shared>( + std::move(cuvs::neighbors::ivf_sq::build( + handle_, + index_params_, + raft::make_host_matrix_view(dataset, nrow, dimension_)))); +} + +template +void cuvs_ivf_sq::set_search_param(const search_param_base& param, const void* filter_bitset) +{ + filter_ = make_cuvs_filter(filter_bitset, index_->size()); + auto sp = dynamic_cast(param); + search_params_ = sp.ivf_sq_params; + assert(search_params_.n_probes <= index_params_.n_lists); +} + +template +void cuvs_ivf_sq::save(const std::string& file) const +{ + cuvs::neighbors::ivf_sq::serialize(handle_, file, *index_); +} + +template +void cuvs_ivf_sq::load(const std::string& file) +{ + index_ = + std::make_shared>(handle_, index_params_, this->dim_); + cuvs::neighbors::ivf_sq::deserialize(handle_, file, index_.get()); +} + +template +std::unique_ptr> cuvs_ivf_sq::copy() +{ + return std::make_unique>(*this); +} + +template +void cuvs_ivf_sq::search( + const T* queries, int batch_size, int k, algo_base::index_type* neighbors, float* distances) const +{ + static_assert(sizeof(algo_base::index_type) == sizeof(int64_t)); + + cuvs::neighbors::ivf_sq::search( + handle_, + search_params_, + *index_, + raft::make_device_matrix_view(queries, batch_size, index_->dim()), + raft::make_device_matrix_view( + reinterpret_cast(neighbors), batch_size, k), + raft::make_device_matrix_view(distances, batch_size, k), + *filter_); +} + +} // namespace cuvs::bench diff --git a/cpp/include/cuvs/neighbors/ivf_sq.hpp b/cpp/include/cuvs/neighbors/ivf_sq.hpp new file mode 100644 index 0000000000..2f09751e95 --- /dev/null +++ b/cpp/include/cuvs/neighbors/ivf_sq.hpp @@ -0,0 +1,336 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include "common.hpp" +#include +#include +#include +#include +#include +#include +#include + +namespace cuvs::neighbors::ivf_sq { + +/** + * @defgroup ivf_sq_cpp_index_params IVF-SQ index build parameters + * @{ + */ + +constexpr static uint32_t kIndexGroupSize = 32; + +struct index_params : cuvs::neighbors::index_params { + uint32_t n_lists = 1024; + uint32_t kmeans_n_iters = 20; + double kmeans_trainset_fraction = 0.5; + bool adaptive_centers = false; + bool conservative_memory_allocation = false; + bool add_data_on_build = true; +}; + +struct search_params : cuvs::neighbors::search_params { + uint32_t n_probes = 20; +}; + +static_assert(std::is_aggregate_v); +static_assert(std::is_aggregate_v); + +/** + * @} + */ + +/** + * @defgroup ivf_sq_cpp_list_spec IVF-SQ list storage spec + * @{ + */ + +template +struct list_spec { + static_assert(std::is_same_v, "IVF-SQ code type IdxT must be uint8_t"); + + using value_type = IdxT; + using list_extents = raft::matrix_extent; + using index_type = ExtT; + + SizeT align_max; + SizeT align_min; + uint32_t dim; + + constexpr list_spec(uint32_t dim, bool conservative_memory_allocation) + : dim(dim), + align_min(kIndexGroupSize), + align_max(conservative_memory_allocation ? kIndexGroupSize : 1024) + { + } + + template + constexpr explicit list_spec(const list_spec& other_spec) + : dim{other_spec.dim}, align_min{other_spec.align_min}, align_max{other_spec.align_max} + { + } + + static constexpr uint32_t kVecLen = 16; + + constexpr auto make_list_extents(SizeT n_rows) const -> list_extents + { + uint32_t padded = ((dim + kVecLen - 1) / kVecLen) * kVecLen; + return raft::make_extents(n_rows, padded); + } +}; + +template +using list_data = ivf::list; + +/** + * @} + */ + +/** + * @defgroup ivf_sq_cpp_index IVF-SQ index + * @{ + */ + +/** + * @brief IVF-SQ index. + * + * @tparam IdxT SQ code type. Only uint8_t (8-bit, codes in [0,255]) for now. + * + * No member depends on the raw data type T (float, half). T appears only + * in the free-function signatures (build, search, extend) where input data + * is consumed, following the IVF-PQ pattern. + */ +template +struct index : cuvs::neighbors::index { + static_assert(std::is_same_v, "IVF-SQ code type IdxT must be uint8_t for now."); + + using index_params_type = ivf_sq::index_params; + using search_params_type = ivf_sq::search_params; + using code_type = IdxT; + + static constexpr uint32_t sq_bits = sizeof(IdxT) * 8; + + public: + index(const index&) = delete; + index(index&&) = default; + index& operator=(const index&) = delete; + index& operator=(index&&) = default; + ~index() = default; + + index(raft::resources const& res); + index(raft::resources const& res, const index_params& params, uint32_t dim); + index(raft::resources const& res, + cuvs::distance::DistanceType metric, + uint32_t n_lists, + uint32_t dim, + bool adaptive_centers, + bool conservative_memory_allocation); + + cuvs::distance::DistanceType metric() const noexcept; + bool adaptive_centers() const noexcept; + int64_t size() const noexcept; + uint32_t dim() const noexcept; + uint32_t n_lists() const noexcept; + bool conservative_memory_allocation() const noexcept; + + raft::device_vector_view list_sizes() noexcept; + raft::device_vector_view list_sizes() const noexcept; + + raft::device_matrix_view centers() noexcept; + raft::device_matrix_view centers() const noexcept; + + std::optional> center_norms() noexcept; + std::optional> center_norms() const noexcept; + void allocate_center_norms(raft::resources const& res); + + raft::device_vector_view sq_vmin() noexcept; + raft::device_vector_view sq_vmin() const noexcept; + + raft::device_vector_view sq_delta() noexcept; + raft::device_vector_view sq_delta() const noexcept; + + raft::host_vector_view accum_sorted_sizes() noexcept; + [[nodiscard]] raft::host_vector_view accum_sorted_sizes() const noexcept; + + raft::device_vector_view data_ptrs() noexcept; + raft::device_vector_view data_ptrs() const noexcept; + + raft::device_vector_view inds_ptrs() noexcept; + raft::device_vector_view inds_ptrs() const noexcept; + + std::vector>>& lists() noexcept; + const std::vector>>& lists() const noexcept; + + void check_consistency(); + + private: + cuvs::distance::DistanceType metric_; + bool adaptive_centers_; + bool conservative_memory_allocation_; + + std::vector>> lists_; + raft::device_vector list_sizes_; + raft::device_matrix centers_; + std::optional> center_norms_; + raft::device_vector sq_vmin_; + raft::device_vector sq_delta_; + + raft::device_vector data_ptrs_; + raft::device_vector inds_ptrs_; + raft::host_vector accum_sorted_sizes_; +}; + +/** + * @} + */ + +/** + * @defgroup ivf_sq_cpp_index_build IVF-SQ index build + * @{ + */ + +auto build(raft::resources const& handle, + const cuvs::neighbors::ivf_sq::index_params& index_params, + raft::device_matrix_view dataset) + -> cuvs::neighbors::ivf_sq::index; + +void build(raft::resources const& handle, + const cuvs::neighbors::ivf_sq::index_params& index_params, + raft::device_matrix_view dataset, + cuvs::neighbors::ivf_sq::index& idx); + +auto build(raft::resources const& handle, + const cuvs::neighbors::ivf_sq::index_params& index_params, + raft::device_matrix_view dataset) + -> cuvs::neighbors::ivf_sq::index; + +void build(raft::resources const& handle, + const cuvs::neighbors::ivf_sq::index_params& index_params, + raft::device_matrix_view dataset, + cuvs::neighbors::ivf_sq::index& idx); + +auto build(raft::resources const& handle, + const cuvs::neighbors::ivf_sq::index_params& index_params, + raft::host_matrix_view dataset) + -> cuvs::neighbors::ivf_sq::index; + +void build(raft::resources const& handle, + const cuvs::neighbors::ivf_sq::index_params& index_params, + raft::host_matrix_view dataset, + cuvs::neighbors::ivf_sq::index& idx); + +auto build(raft::resources const& handle, + const cuvs::neighbors::ivf_sq::index_params& index_params, + raft::host_matrix_view dataset) + -> cuvs::neighbors::ivf_sq::index; + +void build(raft::resources const& handle, + const cuvs::neighbors::ivf_sq::index_params& index_params, + raft::host_matrix_view dataset, + cuvs::neighbors::ivf_sq::index& idx); + +/** + * @} + */ + +/** + * @defgroup ivf_sq_cpp_index_extend IVF-SQ index extend + * @{ + */ + +auto extend(raft::resources const& handle, + raft::device_matrix_view new_vectors, + std::optional> new_indices, + const cuvs::neighbors::ivf_sq::index& orig_index) + -> cuvs::neighbors::ivf_sq::index; + +void extend(raft::resources const& handle, + raft::device_matrix_view new_vectors, + std::optional> new_indices, + cuvs::neighbors::ivf_sq::index* idx); + +auto extend(raft::resources const& handle, + raft::device_matrix_view new_vectors, + std::optional> new_indices, + const cuvs::neighbors::ivf_sq::index& orig_index) + -> cuvs::neighbors::ivf_sq::index; + +void extend(raft::resources const& handle, + raft::device_matrix_view new_vectors, + std::optional> new_indices, + cuvs::neighbors::ivf_sq::index* idx); + +auto extend(raft::resources const& handle, + raft::host_matrix_view new_vectors, + std::optional> new_indices, + const cuvs::neighbors::ivf_sq::index& orig_index) + -> cuvs::neighbors::ivf_sq::index; + +void extend(raft::resources const& handle, + raft::host_matrix_view new_vectors, + std::optional> new_indices, + cuvs::neighbors::ivf_sq::index* idx); + +auto extend(raft::resources const& handle, + raft::host_matrix_view new_vectors, + std::optional> new_indices, + const cuvs::neighbors::ivf_sq::index& orig_index) + -> cuvs::neighbors::ivf_sq::index; + +void extend(raft::resources const& handle, + raft::host_matrix_view new_vectors, + std::optional> new_indices, + cuvs::neighbors::ivf_sq::index* idx); + +/** + * @} + */ + +/** + * @defgroup ivf_sq_cpp_index_search IVF-SQ index search + * @{ + */ + +void search(raft::resources const& handle, + const cuvs::neighbors::ivf_sq::search_params& params, + const cuvs::neighbors::ivf_sq::index& index, + raft::device_matrix_view queries, + raft::device_matrix_view neighbors, + raft::device_matrix_view distances, + const cuvs::neighbors::filtering::base_filter& sample_filter = + cuvs::neighbors::filtering::none_sample_filter{}); + +void search(raft::resources const& handle, + const cuvs::neighbors::ivf_sq::search_params& params, + const cuvs::neighbors::ivf_sq::index& index, + raft::device_matrix_view queries, + raft::device_matrix_view neighbors, + raft::device_matrix_view distances, + const cuvs::neighbors::filtering::base_filter& sample_filter = + cuvs::neighbors::filtering::none_sample_filter{}); + +/** + * @} + */ + +/** + * @defgroup ivf_sq_cpp_index_serialize IVF-SQ index serialize + * @{ + */ + +void serialize(raft::resources const& handle, + const std::string& filename, + const cuvs::neighbors::ivf_sq::index& index); + +void deserialize(raft::resources const& handle, + const std::string& filename, + cuvs::neighbors::ivf_sq::index* index); + +/** + * @} + */ + +} // namespace cuvs::neighbors::ivf_sq diff --git a/cpp/src/neighbors/ivf_sq/ivf_sq_build.cuh b/cpp/src/neighbors/ivf_sq/ivf_sq_build.cuh new file mode 100644 index 0000000000..6c46a20e65 --- /dev/null +++ b/cpp/src/neighbors/ivf_sq/ivf_sq_build.cuh @@ -0,0 +1,664 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include "../../core/nvtx.hpp" +#include "../ivf_common.cuh" +#include "../ivf_list.cuh" + +#include +#include +#include + +#include "../../cluster/kmeans_balanced.cuh" +#include "../detail/ann_utils.cuh" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include +#include + +namespace cuvs::neighbors::ivf_sq { +using namespace cuvs::spatial::knn::detail; // NOLINT + +namespace detail { + +struct ColMinMaxPair { + float min_val; + float max_val; +}; + +struct ColMinMaxOp { + __device__ __forceinline__ ColMinMaxPair operator()(const ColMinMaxPair& a, + const ColMinMaxPair& b) const + { + return {fminf(a.min_val, b.min_val), fmaxf(a.max_val, b.max_val)}; + } +}; + +/** + * Fused per-column min+max in a single pass (2x less DRAM traffic than two + * separate reductions). One thread block per column; threads stride over + * rows and feed CUB BlockReduce with a combined min/max pair. + * + * Row-loop is manually 4x-unrolled so the compiler can overlap four + * independent __ldg requests in the memory pipeline. + */ +template +__launch_bounds__(BlockSize) RAFT_KERNEL fused_column_minmax_kernel(const float* __restrict__ data, + float* __restrict__ col_min, + float* __restrict__ col_max, + int64_t n_rows, + uint32_t dim) +{ + using BlockReduce = cub::BlockReduce; + __shared__ typename BlockReduce::TempStorage temp_storage; + + const uint32_t col = blockIdx.x; + if (col >= dim) return; + + ColMinMaxPair agg = {std::numeric_limits::max(), std::numeric_limits::lowest()}; + + const int64_t stride = static_cast(BlockSize); + int64_t row = static_cast(threadIdx.x); + + for (; row + 3 * stride < n_rows; row += 4 * stride) { + float v0 = __ldg(&data[row * dim + col]); + float v1 = __ldg(&data[(row + stride) * dim + col]); + float v2 = __ldg(&data[(row + 2 * stride) * dim + col]); + float v3 = __ldg(&data[(row + 3 * stride) * dim + col]); + agg.min_val = fminf(agg.min_val, fminf(fminf(v0, v1), fminf(v2, v3))); + agg.max_val = fmaxf(agg.max_val, fmaxf(fmaxf(v0, v1), fmaxf(v2, v3))); + } + for (; row < n_rows; row += stride) { + float val = __ldg(&data[row * dim + col]); + agg.min_val = fminf(agg.min_val, val); + agg.max_val = fmaxf(agg.max_val, val); + } + + agg = BlockReduce(temp_storage).Reduce(agg, ColMinMaxOp()); + + if (threadIdx.x == 0) { + col_min[col] = agg.min_val; + col_max[col] = agg.max_val; + } +} + +template +auto clone(const raft::resources& res, const index& source) -> index +{ + auto stream = raft::resource::get_cuda_stream(res); + + index target(res, + source.metric(), + source.n_lists(), + source.dim(), + source.adaptive_centers(), + source.conservative_memory_allocation()); + + raft::copy(target.list_sizes().data_handle(), + source.list_sizes().data_handle(), + source.list_sizes().size(), + stream); + raft::copy(target.centers().data_handle(), + source.centers().data_handle(), + source.centers().size(), + stream); + if (source.center_norms().has_value()) { + target.allocate_center_norms(res); + raft::copy(target.center_norms()->data_handle(), + source.center_norms()->data_handle(), + source.center_norms()->size(), + stream); + } + raft::copy(target.sq_vmin().data_handle(), + source.sq_vmin().data_handle(), + source.sq_vmin().size(), + stream); + raft::copy(target.sq_delta().data_handle(), + source.sq_delta().data_handle(), + source.sq_delta().size(), + stream); + target.lists() = source.lists(); + ivf::detail::recompute_internal_state(res, target); + return target; +} + +/** + * Kernel to encode float residuals to uint8_t SQ codes and write them interleaved. + * + * Uses warp-per-vector parallelism: each warp cooperatively encodes one vector + * so that reads from residuals/vmin/delta are coalesced across the 32 lanes. + * Lane 0 handles the atomic position assignment and the index write. + */ +template +__launch_bounds__(BlockSize) RAFT_KERNEL encode_and_fill_kernel(const uint32_t* labels, + const float* residuals, + const int64_t* source_ixs, + uint8_t** list_data_ptrs, + int64_t** list_index_ptrs, + uint32_t* list_sizes_ptr, + const float* vmin, + const float* delta, + int64_t n_rows, + uint32_t dim, + int64_t batch_offset) +{ + constexpr uint32_t kWarpSize = kIndexGroupSize; + constexpr uint32_t kWarpsPerBlock = BlockSize / kWarpSize; + + const uint32_t lane_id = threadIdx.x % kWarpSize; + const int64_t row_id = + int64_t(threadIdx.x / kWarpSize) + int64_t(blockIdx.x) * int64_t(kWarpsPerBlock); + if (row_id >= n_rows) return; + + uint32_t list_id = 0; + uint32_t inlist_id = 0; + if (lane_id == 0) { + auto source_ix = source_ixs == nullptr ? row_id + batch_offset : source_ixs[row_id]; + list_id = labels[row_id]; + inlist_id = atomicAdd(list_sizes_ptr + list_id, 1); + list_index_ptrs[list_id][inlist_id] = source_ix; + } + list_id = __shfl_sync(0xFFFFFFFF, list_id, 0); + inlist_id = __shfl_sync(0xFFFFFFFF, inlist_id, 0); + + using interleaved_group = raft::Pow2; + auto group_offset = interleaved_group::roundDown(inlist_id); + auto ingroup_id = interleaved_group::mod(inlist_id); + + constexpr uint32_t veclen = list_spec::kVecLen; + uint32_t padded_dim = ((dim + veclen - 1) / veclen) * veclen; + auto* list_dat = list_data_ptrs[list_id] + static_cast(group_offset) * padded_dim; + const float* src = residuals + row_id * dim; + + for (uint32_t d = lane_id; d < padded_dim; d += kWarpSize) { + uint8_t out; + if (d < dim) { + float val = src[d]; + float dv = delta[d]; + float v = vmin[d]; + float code = (dv > 0.0f) ? roundf((val - v) / dv) : 0.0f; + out = static_cast(fminf(fmaxf(code, 0.0f), 255.0f)); + } else { + out = 0; + } + uint32_t l = (d / veclen) * veclen; + uint32_t j = d % veclen; + list_dat[l * kIndexGroupSize + ingroup_id * veclen + j] = out; + } +} + +/** + * Compute residuals: residual[i] = cast(x_i) - centers[labels[i]] + */ +template +RAFT_KERNEL compute_residuals_kernel(const T* dataset, + const float* centers, + const uint32_t* labels, + float* residuals, + int64_t n_rows, + uint32_t dim) +{ + int64_t i = int64_t(blockIdx.x) * blockDim.x + threadIdx.x; + uint32_t j = blockIdx.y * blockDim.y + threadIdx.y; + if (i >= n_rows || j >= dim) return; + + float val = utils::mapping{}(dataset[i * dim + j]); + uint32_t c = labels[i]; + residuals[i * dim + j] = val - centers[c * dim + j]; +} + +template +void extend(raft::resources const& handle, + index* index, + const T* new_vectors, + const int64_t* new_indices, + int64_t n_rows) +{ + using LabelT = uint32_t; + RAFT_EXPECTS(index != nullptr, "index cannot be empty."); + if (n_rows == 0) return; + + auto stream = raft::resource::get_cuda_stream(handle); + auto n_lists = index->n_lists(); + auto dim = index->dim(); + list_spec list_device_spec{index->dim(), + index->conservative_memory_allocation()}; + cuvs::common::nvtx::range fun_scope( + "ivf_sq::extend(%zu, %u)", size_t(n_rows), dim); + + RAFT_EXPECTS(new_indices != nullptr || index->size() == 0, + "You must pass data indices when the index is non-empty."); + + auto new_labels = + raft::make_device_mdarray(handle, + raft::resource::get_large_workspace_resource(handle), + raft::make_extents(n_rows)); + cuvs::cluster::kmeans::balanced_params kmeans_params; + kmeans_params.metric = index->metric(); + auto orig_centroids_view = raft::make_device_matrix_view( + index->centers().data_handle(), n_lists, dim); + + constexpr size_t kReasonableMaxBatchSize = 65536; + size_t max_batch_size = std::min(n_rows, kReasonableMaxBatchSize); + + auto copy_stream = raft::resource::get_cuda_stream(handle); + bool enable_prefetch = false; + if (handle.has_resource_factory(raft::resource::resource_type::CUDA_STREAM_POOL)) { + if (raft::resource::get_stream_pool_size(handle) >= 1) { + enable_prefetch = true; + copy_stream = raft::resource::get_stream_from_stream_pool(handle); + } + } + + utils::batch_load_iterator vec_batches(new_vectors, + n_rows, + index->dim(), + max_batch_size, + copy_stream, + raft::resource::get_workspace_resource(handle), + enable_prefetch); + vec_batches.prefetch_next_batch(); + + for (const auto& batch : vec_batches) { + auto batch_data_view = + raft::make_device_matrix_view(batch.data(), batch.size(), index->dim()); + auto batch_labels_view = raft::make_device_vector_view( + new_labels.data_handle() + batch.offset(), batch.size()); + cuvs::cluster::kmeans::predict( + handle, kmeans_params, batch_data_view, orig_centroids_view, batch_labels_view); + vec_batches.prefetch_next_batch(); + raft::resource::sync_stream(handle); + } + + auto* list_sizes_ptr = index->list_sizes().data_handle(); + auto old_list_sizes_dev = raft::make_device_mdarray( + handle, raft::resource::get_workspace_resource(handle), raft::make_extents(n_lists)); + raft::copy(old_list_sizes_dev.data_handle(), list_sizes_ptr, n_lists, stream); + + if (index->adaptive_centers()) { + auto centroids_view = raft::make_device_matrix_view( + index->centers().data_handle(), index->centers().extent(0), index->centers().extent(1)); + auto list_sizes_view = + raft::make_device_vector_view, int64_t>( + list_sizes_ptr, n_lists); + for (const auto& batch : vec_batches) { + auto batch_data_view = + raft::make_device_matrix_view(batch.data(), batch.size(), index->dim()); + auto batch_labels_view = raft::make_device_vector_view( + new_labels.data_handle() + batch.offset(), batch.size()); + cuvs::cluster::kmeans_balanced::helpers::calc_centers_and_sizes(handle, + batch_data_view, + batch_labels_view, + centroids_view, + list_sizes_view, + false, + utils::mapping{}); + } + } else { + raft::stats::histogram(raft::stats::HistTypeAuto, + reinterpret_cast(list_sizes_ptr), + int64_t(n_lists), + new_labels.data_handle(), + n_rows, + 1, + stream); + raft::linalg::add( + list_sizes_ptr, list_sizes_ptr, old_list_sizes_dev.data_handle(), n_lists, stream); + } + + std::vector new_list_sizes(n_lists); + std::vector old_list_sizes(n_lists); + { + raft::copy(old_list_sizes.data(), old_list_sizes_dev.data_handle(), n_lists, stream); + raft::copy(new_list_sizes.data(), list_sizes_ptr, n_lists, stream); + raft::resource::sync_stream(handle); + auto& lists = index->lists(); + for (uint32_t label = 0; label < n_lists; label++) { + ivf::resize_list(handle, + lists[label], + list_device_spec, + new_list_sizes[label], + raft::Pow2::roundUp(old_list_sizes[label])); + } + } + ivf::detail::recompute_internal_state(handle, *index); + raft::copy(list_sizes_ptr, old_list_sizes_dev.data_handle(), n_lists, stream); + + utils::batch_load_iterator vec_indices( + new_indices, n_rows, 1, max_batch_size, stream, raft::resource::get_workspace_resource(handle)); + vec_batches.reset(); + vec_batches.prefetch_next_batch(); + utils::batch_load_iterator idx_batch = vec_indices.begin(); + + auto residuals_buf = raft::make_device_vector(handle, max_batch_size * dim); + + size_t next_report_offset = 0; + size_t d_report_offset = n_rows * 5 / 100; + + for (const auto& batch : vec_batches) { + int64_t bs = batch.size(); + + { + dim3 threads(32, 8); + dim3 blocks(raft::ceildiv(bs, threads.x), raft::ceildiv(dim, threads.y)); + compute_residuals_kernel + <<>>(batch.data(), + index->centers().data_handle(), + new_labels.data_handle() + batch.offset(), + residuals_buf.data_handle(), + bs, + dim); + RAFT_CUDA_TRY(cudaPeekAtLastError()); + } + + { + constexpr int kEncodeBlockSize = 256; + constexpr int kEncodeWarpsPerBlk = kEncodeBlockSize / kIndexGroupSize; + const dim3 block_dim(kEncodeBlockSize); + const dim3 grid_dim(raft::ceildiv(bs, int64_t(kEncodeWarpsPerBlk))); + encode_and_fill_kernel + <<>>(new_labels.data_handle() + batch.offset(), + residuals_buf.data_handle(), + idx_batch->data(), + index->data_ptrs().data_handle(), + index->inds_ptrs().data_handle(), + list_sizes_ptr, + index->sq_vmin().data_handle(), + index->sq_delta().data_handle(), + bs, + dim, + batch.offset()); + RAFT_CUDA_TRY(cudaPeekAtLastError()); + } + + vec_batches.prefetch_next_batch(); + raft::resource::sync_stream(handle); + RAFT_CUDA_TRY(cudaPeekAtLastError()); + + if (batch.offset() > next_report_offset) { + float progress = batch.offset() * 100.0f / n_rows; + RAFT_LOG_DEBUG("ivf_sq::extend added vectors %zu, %6.1f%% complete", + static_cast(batch.offset()), + progress); + next_report_offset += d_report_offset; + } + ++idx_batch; + } + + auto compute_center_norms = [&]() { + if (index->metric() == cuvs::distance::DistanceType::CosineExpanded) { + raft::linalg::rowNorm(index->center_norms()->data_handle(), + index->centers().data_handle(), + dim, + n_lists, + stream, + raft::sqrt_op{}); + } else { + raft::linalg::rowNorm( + index->center_norms()->data_handle(), index->centers().data_handle(), dim, n_lists, stream); + } + }; + + if (!index->center_norms().has_value()) { + index->allocate_center_norms(handle); + if (index->center_norms().has_value()) { compute_center_norms(); } + } else if (index->adaptive_centers()) { + compute_center_norms(); + } +} + +template +auto extend(raft::resources const& handle, + const index& orig_index, + const T* new_vectors, + const int64_t* new_indices, + int64_t n_rows) -> index +{ + auto ext_index = clone(handle, orig_index); + detail::extend(handle, &ext_index, new_vectors, new_indices, n_rows); + return ext_index; +} + +template +inline auto build(raft::resources const& handle, + const index_params& params, + const T* dataset, + int64_t n_rows, + uint32_t dim) -> index +{ + auto stream = raft::resource::get_cuda_stream(handle); + cuvs::common::nvtx::range fun_scope( + "ivf_sq::build(%zu, %u)", size_t(n_rows), dim); + static_assert(std::is_same_v || std::is_same_v, "unsupported data type"); + RAFT_EXPECTS(n_rows > 0 && dim > 0, "empty dataset"); + RAFT_EXPECTS(n_rows >= params.n_lists, "number of rows can't be less than n_lists"); + RAFT_EXPECTS(params.metric != cuvs::distance::DistanceType::CosineExpanded || dim > 1, + "Cosine metric requires more than one dim"); + + index idx(handle, params, dim); + utils::memzero(idx.accum_sorted_sizes().data_handle(), idx.accum_sorted_sizes().size(), stream); + utils::memzero(idx.list_sizes().data_handle(), idx.list_sizes().size(), stream); + utils::memzero(idx.data_ptrs().data_handle(), idx.data_ptrs().size(), stream); + utils::memzero(idx.inds_ptrs().data_handle(), idx.inds_ptrs().size(), stream); + + // Train k-means centroids and SQ parameters on the same training subset. + // This mirrors IVF-PQ, which also trains its codebook on a subset of the data. + { + auto trainset_ratio = std::max( + 1, n_rows / std::max(params.kmeans_trainset_fraction * n_rows, idx.n_lists())); + auto n_rows_train = n_rows / trainset_ratio; + rmm::device_uvector trainset( + n_rows_train * idx.dim(), stream, raft::resource::get_large_workspace_resource(handle)); + RAFT_CUDA_TRY(cudaMemcpy2DAsync(trainset.data(), + sizeof(T) * idx.dim(), + dataset, + sizeof(T) * idx.dim() * trainset_ratio, + sizeof(T) * idx.dim(), + n_rows_train, + cudaMemcpyDefault, + stream)); + auto trainset_const_view = + raft::make_device_matrix_view(trainset.data(), n_rows_train, idx.dim()); + auto centers_view = raft::make_device_matrix_view( + idx.centers().data_handle(), idx.n_lists(), idx.dim()); + cuvs::cluster::kmeans::balanced_params kmeans_params; + kmeans_params.n_iters = params.kmeans_n_iters; + kmeans_params.metric = idx.metric(); + cuvs::cluster::kmeans::fit(handle, kmeans_params, trainset_const_view, centers_view); + raft::resource::sync_stream(handle); + + // Train SQ: predict labels for the training subset, compute its residuals, + // and derive per-dimension vmin/delta from them. + auto train_labels = raft::make_device_vector(handle, n_rows_train); + { + cuvs::cluster::kmeans::balanced_params pred_params; + pred_params.metric = idx.metric(); + auto centers_const_view = raft::make_device_matrix_view( + idx.centers().data_handle(), idx.n_lists(), dim); + cuvs::cluster::kmeans::predict( + handle, pred_params, trainset_const_view, centers_const_view, train_labels.view()); + raft::resource::sync_stream(handle); + } + + rmm::device_uvector residuals( + n_rows_train * dim, stream, raft::resource::get_large_workspace_resource(handle)); + { + dim3 threads(32, 8); + dim3 blocks(raft::ceildiv(n_rows_train, threads.x), + raft::ceildiv(dim, threads.y)); + compute_residuals_kernel<<>>(trainset.data(), + idx.centers().data_handle(), + train_labels.data_handle(), + residuals.data(), + n_rows_train, + dim); + RAFT_CUDA_TRY(cudaPeekAtLastError()); + } + + { + auto vmax_buf = raft::make_device_vector(handle, dim); + auto* vmin_ptr = idx.sq_vmin().data_handle(); + auto* vmax_ptr = vmax_buf.data_handle(); + + constexpr int kMinMaxBlockSize = 256; + fused_column_minmax_kernel<<>>( + residuals.data(), vmin_ptr, vmax_ptr, n_rows_train, dim); + RAFT_CUDA_TRY(cudaPeekAtLastError()); + + // Expand the observed range by a small margin to reduce clipping on unseen data, + // since the SQ parameters are trained on a subset rather than the full dataset. + constexpr float kMargin = 0.05f; + auto* delta_ptr = idx.sq_delta().data_handle(); + raft::linalg::map_offset( + handle, idx.sq_vmin(), [vmin_ptr, vmax_ptr, delta_ptr, kMargin] __device__(uint32_t j) { + float range = vmax_ptr[j] - vmin_ptr[j]; + float margin = range * kMargin; + delta_ptr[j] = (range > 0.0f) ? (range + 2.0f * margin) / 255.0f : 1.0f; + return vmin_ptr[j] - margin; + }); + } + } + + if (params.add_data_on_build) { detail::extend(handle, &idx, dataset, nullptr, n_rows); } + + return idx; +} + +template +auto build(raft::resources const& handle, + const index_params& params, + raft::device_matrix_view dataset) -> index +{ + int64_t n_rows = dataset.extent(0); + uint32_t dim = dataset.extent(1); + return build(handle, params, dataset.data_handle(), n_rows, dim); +} + +template +auto build(raft::resources const& handle, + const index_params& params, + raft::host_matrix_view dataset) -> index +{ + int64_t n_rows = dataset.extent(0); + uint32_t dim = dataset.extent(1); + return build(handle, params, dataset.data_handle(), n_rows, dim); +} + +template +void build(raft::resources const& handle, + const index_params& params, + raft::device_matrix_view dataset, + index& idx) +{ + idx = build(handle, params, dataset); +} + +template +void build(raft::resources const& handle, + const index_params& params, + raft::host_matrix_view dataset, + index& idx) +{ + idx = build(handle, params, dataset); +} + +template +auto extend(raft::resources const& handle, + raft::device_matrix_view new_vectors, + std::optional> new_indices, + const index& orig_index) -> index +{ + RAFT_EXPECTS(new_vectors.extent(1) == orig_index.dim(), + "new_vectors should have the same dimension as the index"); + if (new_indices.has_value()) { + RAFT_EXPECTS(new_indices.value().extent(0) == new_vectors.extent(0), + "new_vectors and new_indices have different number of rows"); + } + int64_t n_rows = new_vectors.extent(0); + return extend(handle, + orig_index, + new_vectors.data_handle(), + new_indices.has_value() ? new_indices.value().data_handle() : nullptr, + n_rows); +} + +template +auto extend(raft::resources const& handle, + raft::host_matrix_view new_vectors, + std::optional> new_indices, + const index& orig_index) -> index +{ + RAFT_EXPECTS(new_vectors.extent(1) == orig_index.dim(), + "new_vectors should have the same dimension as the index"); + if (new_indices.has_value()) { + RAFT_EXPECTS(new_indices.value().extent(0) == new_vectors.extent(0), + "new_vectors and new_indices have different number of rows"); + } + int64_t n_rows = new_vectors.extent(0); + return extend(handle, + orig_index, + new_vectors.data_handle(), + new_indices.has_value() ? new_indices.value().data_handle() : nullptr, + n_rows); +} + +template +void extend(raft::resources const& handle, + raft::device_matrix_view new_vectors, + std::optional> new_indices, + index* idx) +{ + RAFT_EXPECTS(new_vectors.extent(1) == idx->dim(), + "new_vectors should have the same dimension as the index"); + if (new_indices.has_value()) { + RAFT_EXPECTS(new_indices.value().extent(0) == new_vectors.extent(0), + "new_vectors and new_indices have different number of rows"); + } + detail::extend(handle, + idx, + new_vectors.data_handle(), + new_indices.has_value() ? new_indices.value().data_handle() : nullptr, + new_vectors.extent(0)); +} + +template +void extend(raft::resources const& handle, + raft::host_matrix_view new_vectors, + std::optional> new_indices, + index* idx) +{ + RAFT_EXPECTS(new_vectors.extent(1) == idx->dim(), + "new_vectors should have the same dimension as the index"); + if (new_indices.has_value()) { + RAFT_EXPECTS(new_indices.value().extent(0) == new_vectors.extent(0), + "new_vectors and new_indices have different number of rows"); + } + detail::extend(handle, + idx, + new_vectors.data_handle(), + new_indices.has_value() ? new_indices.value().data_handle() : nullptr, + new_vectors.extent(0)); +} + +} // namespace detail +} // namespace cuvs::neighbors::ivf_sq diff --git a/cpp/src/neighbors/ivf_sq/ivf_sq_build_extend_float_uint8_t_int64_t.cu b/cpp/src/neighbors/ivf_sq/ivf_sq_build_extend_float_uint8_t_int64_t.cu new file mode 100644 index 0000000000..a97aebb11c --- /dev/null +++ b/cpp/src/neighbors/ivf_sq/ivf_sq_build_extend_float_uint8_t_int64_t.cu @@ -0,0 +1,89 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#include + +#include "ivf_sq_build.cuh" + +namespace cuvs::neighbors::ivf_sq { + +#define CUVS_INST_IVF_SQ_BUILD_EXTEND(T, IdxT) \ + auto build(raft::resources const& handle, \ + const cuvs::neighbors::ivf_sq::index_params& params, \ + raft::device_matrix_view dataset) \ + -> cuvs::neighbors::ivf_sq::index \ + { \ + return cuvs::neighbors::ivf_sq::index( \ + std::move(cuvs::neighbors::ivf_sq::detail::build(handle, params, dataset))); \ + } \ + \ + void build(raft::resources const& handle, \ + const cuvs::neighbors::ivf_sq::index_params& params, \ + raft::device_matrix_view dataset, \ + cuvs::neighbors::ivf_sq::index& idx) \ + { \ + cuvs::neighbors::ivf_sq::detail::build(handle, params, dataset, idx); \ + } \ + \ + auto build(raft::resources const& handle, \ + const cuvs::neighbors::ivf_sq::index_params& params, \ + raft::host_matrix_view dataset) \ + -> cuvs::neighbors::ivf_sq::index \ + { \ + return cuvs::neighbors::ivf_sq::index( \ + std::move(cuvs::neighbors::ivf_sq::detail::build(handle, params, dataset))); \ + } \ + \ + void build(raft::resources const& handle, \ + const cuvs::neighbors::ivf_sq::index_params& params, \ + raft::host_matrix_view dataset, \ + cuvs::neighbors::ivf_sq::index& idx) \ + { \ + cuvs::neighbors::ivf_sq::detail::build(handle, params, dataset, idx); \ + } \ + \ + auto extend(raft::resources const& handle, \ + raft::device_matrix_view new_vectors, \ + std::optional> new_indices, \ + const cuvs::neighbors::ivf_sq::index& orig_index) \ + -> cuvs::neighbors::ivf_sq::index \ + { \ + return cuvs::neighbors::ivf_sq::index( \ + std::move(cuvs::neighbors::ivf_sq::detail::extend( \ + handle, new_vectors, new_indices, orig_index))); \ + } \ + \ + void extend(raft::resources const& handle, \ + raft::device_matrix_view new_vectors, \ + std::optional> new_indices, \ + cuvs::neighbors::ivf_sq::index* idx) \ + { \ + cuvs::neighbors::ivf_sq::detail::extend(handle, new_vectors, new_indices, idx); \ + } \ + \ + auto extend(raft::resources const& handle, \ + raft::host_matrix_view new_vectors, \ + std::optional> new_indices, \ + const cuvs::neighbors::ivf_sq::index& orig_index) \ + -> cuvs::neighbors::ivf_sq::index \ + { \ + return cuvs::neighbors::ivf_sq::index( \ + std::move(cuvs::neighbors::ivf_sq::detail::extend( \ + handle, new_vectors, new_indices, orig_index))); \ + } \ + \ + void extend(raft::resources const& handle, \ + raft::host_matrix_view new_vectors, \ + std::optional> new_indices, \ + cuvs::neighbors::ivf_sq::index* idx) \ + { \ + cuvs::neighbors::ivf_sq::detail::extend(handle, new_vectors, new_indices, idx); \ + } + +CUVS_INST_IVF_SQ_BUILD_EXTEND(float, uint8_t); + +#undef CUVS_INST_IVF_SQ_BUILD_EXTEND + +} // namespace cuvs::neighbors::ivf_sq diff --git a/cpp/src/neighbors/ivf_sq/ivf_sq_build_extend_half_uint8_t_int64_t.cu b/cpp/src/neighbors/ivf_sq/ivf_sq_build_extend_half_uint8_t_int64_t.cu new file mode 100644 index 0000000000..9148e5c328 --- /dev/null +++ b/cpp/src/neighbors/ivf_sq/ivf_sq_build_extend_half_uint8_t_int64_t.cu @@ -0,0 +1,89 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#include + +#include "ivf_sq_build.cuh" + +namespace cuvs::neighbors::ivf_sq { + +#define CUVS_INST_IVF_SQ_BUILD_EXTEND(T, IdxT) \ + auto build(raft::resources const& handle, \ + const cuvs::neighbors::ivf_sq::index_params& params, \ + raft::device_matrix_view dataset) \ + -> cuvs::neighbors::ivf_sq::index \ + { \ + return cuvs::neighbors::ivf_sq::index( \ + std::move(cuvs::neighbors::ivf_sq::detail::build(handle, params, dataset))); \ + } \ + \ + void build(raft::resources const& handle, \ + const cuvs::neighbors::ivf_sq::index_params& params, \ + raft::device_matrix_view dataset, \ + cuvs::neighbors::ivf_sq::index& idx) \ + { \ + cuvs::neighbors::ivf_sq::detail::build(handle, params, dataset, idx); \ + } \ + \ + auto build(raft::resources const& handle, \ + const cuvs::neighbors::ivf_sq::index_params& params, \ + raft::host_matrix_view dataset) \ + -> cuvs::neighbors::ivf_sq::index \ + { \ + return cuvs::neighbors::ivf_sq::index( \ + std::move(cuvs::neighbors::ivf_sq::detail::build(handle, params, dataset))); \ + } \ + \ + void build(raft::resources const& handle, \ + const cuvs::neighbors::ivf_sq::index_params& params, \ + raft::host_matrix_view dataset, \ + cuvs::neighbors::ivf_sq::index& idx) \ + { \ + cuvs::neighbors::ivf_sq::detail::build(handle, params, dataset, idx); \ + } \ + \ + auto extend(raft::resources const& handle, \ + raft::device_matrix_view new_vectors, \ + std::optional> new_indices, \ + const cuvs::neighbors::ivf_sq::index& orig_index) \ + -> cuvs::neighbors::ivf_sq::index \ + { \ + return cuvs::neighbors::ivf_sq::index( \ + std::move(cuvs::neighbors::ivf_sq::detail::extend( \ + handle, new_vectors, new_indices, orig_index))); \ + } \ + \ + void extend(raft::resources const& handle, \ + raft::device_matrix_view new_vectors, \ + std::optional> new_indices, \ + cuvs::neighbors::ivf_sq::index* idx) \ + { \ + cuvs::neighbors::ivf_sq::detail::extend(handle, new_vectors, new_indices, idx); \ + } \ + \ + auto extend(raft::resources const& handle, \ + raft::host_matrix_view new_vectors, \ + std::optional> new_indices, \ + const cuvs::neighbors::ivf_sq::index& orig_index) \ + -> cuvs::neighbors::ivf_sq::index \ + { \ + return cuvs::neighbors::ivf_sq::index( \ + std::move(cuvs::neighbors::ivf_sq::detail::extend( \ + handle, new_vectors, new_indices, orig_index))); \ + } \ + \ + void extend(raft::resources const& handle, \ + raft::host_matrix_view new_vectors, \ + std::optional> new_indices, \ + cuvs::neighbors::ivf_sq::index* idx) \ + { \ + cuvs::neighbors::ivf_sq::detail::extend(handle, new_vectors, new_indices, idx); \ + } + +CUVS_INST_IVF_SQ_BUILD_EXTEND(half, uint8_t); + +#undef CUVS_INST_IVF_SQ_BUILD_EXTEND + +} // namespace cuvs::neighbors::ivf_sq diff --git a/cpp/src/neighbors/ivf_sq/ivf_sq_search.cuh b/cpp/src/neighbors/ivf_sq/ivf_sq_search.cuh new file mode 100644 index 0000000000..a17992ff19 --- /dev/null +++ b/cpp/src/neighbors/ivf_sq/ivf_sq_search.cuh @@ -0,0 +1,549 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include "../../core/nvtx.hpp" +#include "../detail/ann_utils.cuh" +#include "../ivf_common.cuh" +#include "../sample_filter.cuh" +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +namespace cuvs::neighbors::ivf_sq::detail { + +using namespace cuvs::spatial::knn::detail; // NOLINT + +enum class SqScanMetric { kL2, kIP, kCosine }; + +/** + * Per-probe scan kernel for IVF-SQ search. + * + * Grid: (n_queries, n_probes). Each block handles one (query, probe) pair. + * Within a block, each warp processes one interleaved group of kIndexGroupSize + * (=32) vectors at a time, with each lane responsible for one vector. + * Dimension blocks of veclen=16 bytes are loaded as coalesced uint4 reads + * across the warp (32 lanes x 16 bytes = 512 bytes = 4 cache lines), giving + * full memory-bandwidth utilisation. + * + * Per-dimension constants that are invariant across rows are precomputed into + * shared memory so the hot loop only reads from smem + one uint4 per dim-block: + * + * L2 / L2Sqrt: + * s_query_term[d] = query[d] - centroid[d] - sq_vmin[d] + * dist += (s_query_term[d] - code * s_sq_scale[d])^2 + * + * InnerProduct / Cosine: + * s_query_term[d] = query[d] + * s_recon_base[d] = centroid[d] + sq_vmin[d] + * v_d = s_recon_base[d] + code * s_sq_scale[d] + * dist += s_query_term[d] * v_d + * + * Shared-memory layout adapts to the metric to avoid waste: + * L2 / L2Sqrt : [s_query_term | s_sq_scale] (2 * dim floats) + * InnerProduct/Cosine: [s_query_term | s_recon_base | s_sq_scale] (3 * dim floats) + */ +template +__launch_bounds__(BlockDim) RAFT_KERNEL ivf_sq_scan_kernel(const uint8_t* const* data_ptrs, + const uint32_t* list_sizes, + const uint32_t* coarse_indices, + const float* queries_float, + const float* centers, + const float* sq_vmin, + const float* sq_delta, + const float* query_norms, + uint32_t n_probes, + uint32_t dim, + uint32_t max_samples, + const uint32_t* chunk_indices, + float* out_distances, + uint32_t* out_indices, + IvfSampleFilterT sample_filter) +{ + static_assert(kIndexGroupSize == raft::WarpSize, + "Warp-coalesced scan requires kIndexGroupSize == WarpSize"); + + extern __shared__ float smem[]; + + constexpr bool kIsL2 = (Metric == SqScanMetric::kL2); + constexpr bool kIsCosine = (Metric == SqScanMetric::kCosine); + + float* s_query_term = smem; + float* s_recon_base = smem + dim; + float* s_sq_scale = kIsL2 ? (smem + dim) : (smem + 2 * dim); + + const uint32_t query_ix = blockIdx.x; + const uint32_t probe_ix = blockIdx.y; + + const uint32_t* my_coarse = coarse_indices + query_ix * n_probes; + const uint32_t cluster_id = my_coarse[probe_ix]; + const uint32_t cluster_sz = list_sizes[cluster_id]; + if (cluster_sz == 0) return; + + const uint8_t* codes = data_ptrs[cluster_id]; + const float* query = queries_float + query_ix * dim; + const float* centroid = centers + cluster_id * dim; + + for (uint32_t d = threadIdx.x; d < dim; d += BlockDim) { + float vmin_d = sq_vmin[d]; + s_sq_scale[d] = sq_delta[d]; + if constexpr (kIsL2) { + s_query_term[d] = query[d] - centroid[d] - vmin_d; + } else { + s_query_term[d] = query[d]; + s_recon_base[d] = centroid[d] + vmin_d; + } + } + __syncthreads(); + + const uint32_t* my_chunk = chunk_indices + query_ix * n_probes; + uint32_t out_base = (probe_ix > 0) ? my_chunk[probe_ix - 1] : 0; + + constexpr uint32_t veclen = 16; + constexpr uint32_t kWarpsPerBlock = BlockDim / raft::WarpSize; + const uint32_t warp_id = threadIdx.x / raft::WarpSize; + const uint32_t lane_id = threadIdx.x % raft::WarpSize; + + uint32_t padded_dim = ((dim + veclen - 1) / veclen) * veclen; + uint32_t n_dim_blocks = padded_dim / veclen; + + for (uint32_t group = warp_id * kIndexGroupSize; group < cluster_sz; + group += kWarpsPerBlock * kIndexGroupSize) { + const uint32_t row = group + lane_id; + const bool valid = (row < cluster_sz) && sample_filter(query_ix, cluster_id, row); + + float dist = 0.0f; + float v_norm_sq = 0.0f; + + const uint8_t* group_data = codes + size_t(group) * padded_dim; + + for (uint32_t bl = 0; bl < n_dim_blocks; bl++) { + uint8_t codes_local[veclen]; + *reinterpret_cast(codes_local) = *reinterpret_cast( + group_data + bl * (veclen * kIndexGroupSize) + lane_id * veclen); + + const uint32_t l = bl * veclen; +#pragma unroll + for (uint32_t j = 0; j < veclen; j++) { + if (l + j < dim) { + float recon = float(codes_local[j]) * s_sq_scale[l + j]; + + if constexpr (kIsL2) { + float diff = s_query_term[l + j] - recon; + dist += diff * diff; + } else { + float v_d = s_recon_base[l + j] + recon; + dist += s_query_term[l + j] * v_d; + if constexpr (kIsCosine) { v_norm_sq += v_d * v_d; } + } + } + } + } + + if constexpr (kIsCosine) { + float denom = query_norms[query_ix] * sqrtf(v_norm_sq); + dist = (denom > 0.0f) ? 1.0f - dist / denom : 0.0f; + } + + if (valid) { + uint32_t out_idx = query_ix * max_samples + out_base + row; + out_distances[out_idx] = dist; + out_indices[out_idx] = out_base + row; + } + } +} + +template +void ivf_sq_scan(raft::resources const& handle, + const index& idx, + const float* queries_float, + const float* query_norms, + uint32_t n_queries, + uint32_t n_probes, + uint32_t max_samples, + const uint32_t* coarse_indices, + const uint32_t* chunk_indices, + float* out_distances, + uint32_t* out_indices, + IvfSampleFilterT sample_filter, + rmm::cuda_stream_view stream) +{ + constexpr int kThreads = 256; + dim3 grid(n_queries, n_probes); + dim3 block(kThreads); + uint32_t dim = idx.dim(); + + auto do_launch = [&](auto kernel_ptr, size_t smem) { + RAFT_CUDA_TRY( + cudaFuncSetAttribute(kernel_ptr, cudaFuncAttributeMaxDynamicSharedMemorySize, smem)); + kernel_ptr<<>>(idx.data_ptrs().data_handle(), + idx.list_sizes().data_handle(), + coarse_indices, + queries_float, + idx.centers().data_handle(), + idx.sq_vmin().data_handle(), + idx.sq_delta().data_handle(), + query_norms, + n_probes, + dim, + max_samples, + chunk_indices, + out_distances, + out_indices, + sample_filter); + RAFT_CUDA_TRY(cudaPeekAtLastError()); + }; + + switch (idx.metric()) { + case cuvs::distance::DistanceType::L2Expanded: + case cuvs::distance::DistanceType::L2SqrtExpanded: + do_launch(ivf_sq_scan_kernel, + 2 * dim * sizeof(float)); + break; + case cuvs::distance::DistanceType::InnerProduct: + do_launch(ivf_sq_scan_kernel, + 3 * dim * sizeof(float)); + break; + case cuvs::distance::DistanceType::CosineExpanded: + do_launch(ivf_sq_scan_kernel, + 3 * dim * sizeof(float)); + break; + default: RAFT_FAIL("Unsupported metric type for IVF-SQ scan."); + } +} + +template +void search_impl(raft::resources const& handle, + const index& index, + const T* queries, + uint32_t n_queries, + uint32_t k, + uint32_t n_probes, + bool select_min, + int64_t* neighbors, + float* distances, + rmm::device_async_resource_ref search_mr, + IvfSampleFilterT sample_filter) +{ + auto stream = raft::resource::get_cuda_stream(handle); + auto dim = index.dim(); + + std::size_t n_queries_probes = std::size_t(n_queries) * std::size_t(n_probes); + + rmm::device_uvector query_norm_dev(n_queries, stream, search_mr); + rmm::device_uvector distance_buffer_dev(n_queries * index.n_lists(), stream, search_mr); + rmm::device_uvector coarse_distances_dev(n_queries_probes, stream, search_mr); + rmm::device_uvector coarse_indices_dev(n_queries_probes, stream, search_mr); + + size_t float_query_size; + if constexpr (std::is_same_v) { + float_query_size = 0; + } else { + float_query_size = n_queries * dim; + } + rmm::device_uvector converted_queries_dev(float_query_size, stream, search_mr); + float* converted_queries_ptr = converted_queries_dev.data(); + + if constexpr (std::is_same_v) { + converted_queries_ptr = const_cast(queries); + } else { + raft::linalg::unaryOp( + converted_queries_ptr, queries, n_queries * dim, utils::mapping{}, stream); + } + + auto distance_buffer_dev_view = raft::make_device_matrix_view( + distance_buffer_dev.data(), n_queries, index.n_lists()); + + RAFT_EXPECTS(index.metric() == cuvs::distance::DistanceType::InnerProduct || + index.center_norms().has_value(), + "Center norms are required for search with L2 or Cosine metric. " + "Rebuild the index with add_data_on_build=true or call extend() first."); + + float alpha = 1.0f; + float beta = 0.0f; + switch (index.metric()) { + case cuvs::distance::DistanceType::L2Expanded: + case cuvs::distance::DistanceType::L2SqrtExpanded: { + alpha = -2.0f; + beta = 1.0f; + raft::linalg::rowNorm(query_norm_dev.data(), + converted_queries_ptr, + static_cast(dim), + static_cast(n_queries), + stream); + utils::outer_add(query_norm_dev.data(), + (int64_t)n_queries, + index.center_norms()->data_handle(), + (int64_t)index.n_lists(), + distance_buffer_dev.data(), + stream); + break; + } + case cuvs::distance::DistanceType::CosineExpanded: { + raft::linalg::rowNorm(query_norm_dev.data(), + converted_queries_ptr, + static_cast(dim), + static_cast(n_queries), + stream, + raft::sqrt_op{}); + alpha = -1.0f; + beta = 0.0f; + break; + } + case cuvs::distance::DistanceType::InnerProduct: { + alpha = 1.0f; + beta = 0.0f; + break; + } + default: RAFT_FAIL("Unsupported metric type for IVF-SQ search."); + } + + raft::linalg::gemm(handle, + true, + false, + index.n_lists(), + n_queries, + dim, + &alpha, + index.centers().data_handle(), + dim, + converted_queries_ptr, + dim, + &beta, + distance_buffer_dev.data(), + index.n_lists(), + stream); + + if (index.metric() == cuvs::distance::DistanceType::CosineExpanded) { + auto n_lists_local = index.n_lists(); + const auto* q_norm_ptr = query_norm_dev.data(); + const auto* center_norm_ptr = index.center_norms()->data_handle(); + raft::linalg::map_offset( + handle, + distance_buffer_dev_view, + [=] __device__(const uint32_t idx, const float dist) { + const auto query = idx / n_lists_local; + const auto cluster = idx % n_lists_local; + float denom = q_norm_ptr[query] * center_norm_ptr[cluster]; + return (denom > 0.0f) ? dist / denom : 0.0f; + }, + raft::make_const_mdspan(distance_buffer_dev_view)); + } + + cuvs::selection::select_k( + handle, + raft::make_const_mdspan(distance_buffer_dev_view), + std::nullopt, + raft::make_device_matrix_view(coarse_distances_dev.data(), n_queries, n_probes), + raft::make_device_matrix_view( + coarse_indices_dev.data(), n_queries, n_probes), + select_min); + + rmm::device_uvector num_samples(n_queries, stream, search_mr); + rmm::device_uvector chunk_index(n_queries_probes, stream, search_mr); + + ivf::detail::calc_chunk_indices::configure(n_probes, n_queries)(index.list_sizes().data_handle(), + coarse_indices_dev.data(), + chunk_index.data(), + num_samples.data(), + stream); + + uint32_t max_samples = + std::max(static_cast(index.accum_sorted_sizes()(n_probes)), k); + + rmm::device_uvector all_distances(std::size_t(n_queries) * max_samples, stream, search_mr); + rmm::device_uvector all_indices( + std::size_t(n_queries) * max_samples, stream, search_mr); + + float init_val = + select_min ? std::numeric_limits::max() : std::numeric_limits::lowest(); + thrust::fill_n(raft::resource::get_thrust_policy(handle), + all_distances.data(), + std::size_t(n_queries) * max_samples, + init_val); + thrust::fill_n(raft::resource::get_thrust_policy(handle), + all_indices.data(), + std::size_t(n_queries) * max_samples, + uint32_t(0xFFFFFFFF)); + + auto filter_adapter = cuvs::neighbors::filtering::ivf_to_sample_filter( + index.inds_ptrs().data_handle(), sample_filter); + + ivf_sq_scan(handle, + index, + converted_queries_ptr, + query_norm_dev.data(), + n_queries, + n_probes, + max_samples, + coarse_indices_dev.data(), + chunk_index.data(), + all_distances.data(), + all_indices.data(), + filter_adapter, + stream); + + rmm::device_uvector neighbors_uint32(0, stream, search_mr); + uint32_t* neighbors_uint32_ptr = nullptr; + if constexpr (sizeof(int64_t) == sizeof(uint32_t)) { + neighbors_uint32_ptr = reinterpret_cast(neighbors); + } else { + neighbors_uint32.resize(std::size_t(n_queries) * k, stream); + neighbors_uint32_ptr = neighbors_uint32.data(); + } + + auto num_samples_view = + raft::make_device_vector_view(num_samples.data(), n_queries); + + cuvs::selection::select_k( + handle, + raft::make_device_matrix_view( + all_distances.data(), n_queries, max_samples), + raft::make_device_matrix_view( + all_indices.data(), n_queries, max_samples), + raft::make_device_matrix_view(distances, n_queries, k), + raft::make_device_matrix_view(neighbors_uint32_ptr, n_queries, k), + select_min, + false, + cuvs::selection::SelectAlgo::kAuto, + num_samples_view); + + ivf::detail::postprocess_distances( + handle, distances, distances, index.metric(), n_queries, k, 1.0, false); + + ivf::detail::postprocess_neighbors(neighbors, + neighbors_uint32_ptr, + index.inds_ptrs().data_handle(), + coarse_indices_dev.data(), + chunk_index.data(), + n_queries, + n_probes, + k, + stream); +} + +template +inline void search_with_filtering(raft::resources const& handle, + const search_params& params, + const index& index, + const T* queries, + uint32_t n_queries, + uint32_t k, + int64_t* neighbors, + float* distances, + IvfSampleFilterT sample_filter = IvfSampleFilterT()) +{ + cuvs::common::nvtx::range fun_scope( + "ivf_sq::search(k = %u, n_queries = %u, dim = %zu)", k, n_queries, index.dim()); + + RAFT_EXPECTS(params.n_probes > 0, + "n_probes (number of clusters to probe in the search) must be positive."); + auto n_probes = std::min(params.n_probes, index.n_lists()); + + uint32_t max_samples = + std::max(static_cast(index.accum_sorted_sizes()(n_probes)), k); + + constexpr uint64_t kExpectedWsSize = 1024ull * 1024 * 1024; + uint64_t max_ws_size = + std::min(raft::resource::get_workspace_free_bytes(handle), kExpectedWsSize); + + uint64_t converted_query_floats = std::is_same_v ? 0 : index.dim(); + uint64_t ws_per_query = sizeof(float) * (uint64_t(index.n_lists()) + n_probes + 1 + max_samples + + converted_query_floats) + + sizeof(uint32_t) * (uint64_t(n_probes) * 2 + 1 + max_samples + k); + + const uint32_t max_queries = + std::min(n_queries, std::max(1, max_ws_size / ws_per_query)); + + for (uint32_t offset_q = 0; offset_q < n_queries; offset_q += max_queries) { + uint32_t queries_batch = std::min(max_queries, n_queries - offset_q); + + search_impl(handle, + index, + queries + std::size_t(offset_q) * index.dim(), + queries_batch, + k, + n_probes, + cuvs::distance::is_min_close(index.metric()), + neighbors + std::size_t(offset_q) * k, + distances + std::size_t(offset_q) * k, + raft::resource::get_workspace_resource(handle), + sample_filter); + } +} + +template +void search_with_filtering(raft::resources const& handle, + const search_params& params, + const index& index, + raft::device_matrix_view queries, + raft::device_matrix_view neighbors, + raft::device_matrix_view distances, + IvfSampleFilterT sample_filter = IvfSampleFilterT()) +{ + RAFT_EXPECTS( + queries.extent(0) == neighbors.extent(0) && queries.extent(0) == distances.extent(0), + "Number of rows in output neighbors and distances matrices must equal the number of queries."); + RAFT_EXPECTS(neighbors.extent(1) == distances.extent(1), + "Number of columns in output neighbors and distances matrices must be equal"); + RAFT_EXPECTS(queries.extent(1) == index.dim(), + "Number of query dimensions should equal number of dimensions in the index."); + + search_with_filtering(handle, + params, + index, + queries.data_handle(), + static_cast(queries.extent(0)), + static_cast(neighbors.extent(1)), + neighbors.data_handle(), + distances.data_handle(), + sample_filter); +} + +template +void search(raft::resources const& handle, + const search_params& params, + const index& idx, + raft::device_matrix_view queries, + raft::device_matrix_view neighbors, + raft::device_matrix_view distances, + const cuvs::neighbors::filtering::base_filter& sample_filter_ref) +{ + try { + auto& sample_filter = + dynamic_cast(sample_filter_ref); + return search_with_filtering(handle, params, idx, queries, neighbors, distances, sample_filter); + } catch (const std::bad_cast&) { + } + + try { + auto& sample_filter = + dynamic_cast&>( + sample_filter_ref); + return search_with_filtering(handle, params, idx, queries, neighbors, distances, sample_filter); + } catch (const std::bad_cast&) { + RAFT_FAIL("Unsupported sample filter type"); + } +} + +} // namespace cuvs::neighbors::ivf_sq::detail diff --git a/cpp/src/neighbors/ivf_sq/ivf_sq_search_float_uint8_t_int64_t.cu b/cpp/src/neighbors/ivf_sq/ivf_sq_search_float_uint8_t_int64_t.cu new file mode 100644 index 0000000000..60d95a153f --- /dev/null +++ b/cpp/src/neighbors/ivf_sq/ivf_sq_search_float_uint8_t_int64_t.cu @@ -0,0 +1,29 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#include + +#include "ivf_sq_search.cuh" + +namespace cuvs::neighbors::ivf_sq { + +#define CUVS_INST_IVF_SQ_SEARCH(T, IdxT) \ + void search(raft::resources const& handle, \ + const cuvs::neighbors::ivf_sq::search_params& params, \ + const cuvs::neighbors::ivf_sq::index& index, \ + raft::device_matrix_view queries, \ + raft::device_matrix_view neighbors, \ + raft::device_matrix_view distances, \ + const cuvs::neighbors::filtering::base_filter& sample_filter) \ + { \ + cuvs::neighbors::ivf_sq::detail::search( \ + handle, params, index, queries, neighbors, distances, sample_filter); \ + } + +CUVS_INST_IVF_SQ_SEARCH(float, uint8_t); + +#undef CUVS_INST_IVF_SQ_SEARCH + +} // namespace cuvs::neighbors::ivf_sq diff --git a/cpp/src/neighbors/ivf_sq/ivf_sq_search_half_uint8_t_int64_t.cu b/cpp/src/neighbors/ivf_sq/ivf_sq_search_half_uint8_t_int64_t.cu new file mode 100644 index 0000000000..fbed3fd432 --- /dev/null +++ b/cpp/src/neighbors/ivf_sq/ivf_sq_search_half_uint8_t_int64_t.cu @@ -0,0 +1,29 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#include + +#include "ivf_sq_search.cuh" + +namespace cuvs::neighbors::ivf_sq { + +#define CUVS_INST_IVF_SQ_SEARCH(T, IdxT) \ + void search(raft::resources const& handle, \ + const cuvs::neighbors::ivf_sq::search_params& params, \ + const cuvs::neighbors::ivf_sq::index& index, \ + raft::device_matrix_view queries, \ + raft::device_matrix_view neighbors, \ + raft::device_matrix_view distances, \ + const cuvs::neighbors::filtering::base_filter& sample_filter) \ + { \ + cuvs::neighbors::ivf_sq::detail::search( \ + handle, params, index, queries, neighbors, distances, sample_filter); \ + } + +CUVS_INST_IVF_SQ_SEARCH(half, uint8_t); + +#undef CUVS_INST_IVF_SQ_SEARCH + +} // namespace cuvs::neighbors::ivf_sq diff --git a/cpp/src/neighbors/ivf_sq/ivf_sq_serialize.cuh b/cpp/src/neighbors/ivf_sq/ivf_sq_serialize.cuh new file mode 100644 index 0000000000..b95e63ee33 --- /dev/null +++ b/cpp/src/neighbors/ivf_sq/ivf_sq_serialize.cuh @@ -0,0 +1,161 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include "../ivf_common.cuh" +#include "../ivf_list.cuh" +#include +#include + +#include +#include +#include +#include +#include + +#include + +namespace cuvs::neighbors::ivf_sq::detail { + +constexpr int serialization_version = 1; + +template +void serialize(raft::resources const& handle, std::ostream& os, const index& index_) +{ + RAFT_LOG_DEBUG( + "Saving IVF-SQ index, size %zu, dim %u", static_cast(index_.size()), index_.dim()); + + std::string dtype_string = raft::detail::numpy_serializer::get_numpy_dtype().to_string(); + dtype_string.resize(4); + os << dtype_string; + + serialize_scalar(handle, os, serialization_version); + serialize_scalar(handle, os, index_.size()); + serialize_scalar(handle, os, index_.dim()); + serialize_scalar(handle, os, index_.n_lists()); + serialize_scalar(handle, os, index_.metric()); + serialize_scalar(handle, os, index_.adaptive_centers()); + serialize_scalar(handle, os, index_.conservative_memory_allocation()); + serialize_mdspan(handle, os, index_.centers()); + + if (index_.center_norms()) { + bool has_norms = true; + serialize_scalar(handle, os, has_norms); + serialize_mdspan(handle, os, *index_.center_norms()); + } else { + bool has_norms = false; + serialize_scalar(handle, os, has_norms); + } + + serialize_mdspan(handle, os, index_.sq_vmin()); + serialize_mdspan(handle, os, index_.sq_delta()); + + auto sizes_host = raft::make_host_vector(index_.list_sizes().extent(0)); + raft::copy(sizes_host.data_handle(), + index_.list_sizes().data_handle(), + sizes_host.size(), + raft::resource::get_cuda_stream(handle)); + raft::resource::sync_stream(handle); + serialize_mdspan(handle, os, sizes_host.view()); + + list_spec list_store_spec{index_.dim(), true}; + for (uint32_t label = 0; label < index_.n_lists(); label++) { + ivf::serialize_list(handle, + os, + index_.lists()[label], + list_store_spec, + raft::Pow2::roundUp(sizes_host(label))); + } + raft::resource::sync_stream(handle); +} + +template +void serialize(raft::resources const& handle, + const std::string& filename, + const index& index_) +{ + std::ofstream of(filename, std::ios::out | std::ios::binary); + if (!of) { RAFT_FAIL("Cannot open file %s", filename.c_str()); } + detail::serialize(handle, of, index_); + of.close(); + if (!of) { RAFT_FAIL("Error writing output %s", filename.c_str()); } +} + +template +auto deserialize(raft::resources const& handle, std::istream& is) -> index +{ + char dtype_string[4]; + is.read(dtype_string, 4); + + auto ver = raft::deserialize_scalar(handle, is); + if (ver != serialization_version) { + RAFT_FAIL("serialization version mismatch, expected %d, got %d ", serialization_version, ver); + } + auto n_rows = raft::deserialize_scalar(handle, is); + auto dim = raft::deserialize_scalar(handle, is); + auto n_lists = raft::deserialize_scalar(handle, is); + auto metric = raft::deserialize_scalar(handle, is); + bool adaptive_centers = raft::deserialize_scalar(handle, is); + bool cma = raft::deserialize_scalar(handle, is); + + index index_ = index(handle, metric, n_lists, dim, adaptive_centers, cma); + + deserialize_mdspan(handle, is, index_.centers()); + + bool has_norms = raft::deserialize_scalar(handle, is); + if (has_norms) { + index_.allocate_center_norms(handle); + if (!index_.center_norms()) { + RAFT_FAIL("Error inconsistent center norms"); + } else { + auto center_norms = index_.center_norms().value(); + deserialize_mdspan(handle, is, center_norms); + } + } + + deserialize_mdspan(handle, is, index_.sq_vmin()); + deserialize_mdspan(handle, is, index_.sq_delta()); + + deserialize_mdspan(handle, is, index_.list_sizes()); + + list_spec list_device_spec{index_.dim(), cma}; + list_spec list_store_spec{index_.dim(), true}; + for (uint32_t label = 0; label < index_.n_lists(); label++) { + ivf::deserialize_list(handle, is, index_.lists()[label], list_store_spec, list_device_spec); + } + raft::resource::sync_stream(handle); + + ivf::detail::recompute_internal_state(handle, index_); + + return index_; +} + +template +auto deserialize(raft::resources const& handle, const std::string& filename) -> index +{ + std::ifstream is(filename, std::ios::in | std::ios::binary); + if (!is) { RAFT_FAIL("Cannot open file %s", filename.c_str()); } + auto index = detail::deserialize(handle, is); + is.close(); + return index; +} + +} // namespace cuvs::neighbors::ivf_sq::detail + +#define CUVS_INST_IVF_SQ_SERIALIZE(IdxT) \ + void serialize(raft::resources const& handle, \ + const std::string& filename, \ + const cuvs::neighbors::ivf_sq::index& index) \ + { \ + cuvs::neighbors::ivf_sq::detail::serialize(handle, filename, index); \ + } \ + \ + void deserialize(raft::resources const& handle, \ + const std::string& filename, \ + cuvs::neighbors::ivf_sq::index* index) \ + { \ + *index = cuvs::neighbors::ivf_sq::detail::deserialize(handle, filename); \ + } diff --git a/cpp/src/neighbors/ivf_sq/ivf_sq_serialize_uint8_t.cu b/cpp/src/neighbors/ivf_sq/ivf_sq_serialize_uint8_t.cu new file mode 100644 index 0000000000..c2351ed8c3 --- /dev/null +++ b/cpp/src/neighbors/ivf_sq/ivf_sq_serialize_uint8_t.cu @@ -0,0 +1,16 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#include + +#include "ivf_sq_serialize.cuh" + +namespace cuvs::neighbors::ivf_sq { + +CUVS_INST_IVF_SQ_SERIALIZE(uint8_t); + +#undef CUVS_INST_IVF_SQ_SERIALIZE + +} // namespace cuvs::neighbors::ivf_sq diff --git a/cpp/src/neighbors/ivf_sq_index.cpp b/cpp/src/neighbors/ivf_sq_index.cpp new file mode 100644 index 0000000000..d97ace7dcb --- /dev/null +++ b/cpp/src/neighbors/ivf_sq_index.cpp @@ -0,0 +1,236 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#include + +namespace cuvs::neighbors::ivf_sq { + +template +index::index(raft::resources const& res) + : index(res, cuvs::distance::DistanceType::L2Expanded, 0, 0, false, false) +{ +} + +template +index::index(raft::resources const& res, const index_params& params, uint32_t dim) + : index(res, + params.metric, + params.n_lists, + dim, + params.adaptive_centers, + params.conservative_memory_allocation) +{ +} + +template +index::index(raft::resources const& res, + cuvs::distance::DistanceType metric, + uint32_t n_lists, + uint32_t dim, + bool adaptive_centers, + bool conservative_memory_allocation) + : cuvs::neighbors::index(), + metric_(metric), + adaptive_centers_(adaptive_centers), + conservative_memory_allocation_(conservative_memory_allocation), + lists_{n_lists}, + list_sizes_{raft::make_device_vector(res, n_lists)}, + centers_(raft::make_device_matrix(res, n_lists, dim)), + center_norms_(std::nullopt), + sq_vmin_{raft::make_device_vector(res, dim)}, + sq_delta_{raft::make_device_vector(res, dim)}, + data_ptrs_{raft::make_device_vector(res, n_lists)}, + inds_ptrs_{raft::make_device_vector(res, n_lists)}, + accum_sorted_sizes_{raft::make_host_vector(n_lists + 1)} +{ + check_consistency(); + accum_sorted_sizes_(n_lists) = 0; +} + +template +cuvs::distance::DistanceType index::metric() const noexcept +{ + return metric_; +} + +template +bool index::adaptive_centers() const noexcept +{ + return adaptive_centers_; +} + +template +int64_t index::size() const noexcept +{ + return accum_sorted_sizes()(n_lists()); +} + +template +uint32_t index::dim() const noexcept +{ + return centers_.extent(1); +} + +template +uint32_t index::n_lists() const noexcept +{ + return lists_.size(); +} + +template +bool index::conservative_memory_allocation() const noexcept +{ + return conservative_memory_allocation_; +} + +template +raft::device_vector_view index::list_sizes() noexcept +{ + return list_sizes_.view(); +} + +template +raft::device_vector_view index::list_sizes() const noexcept +{ + return list_sizes_.view(); +} + +template +raft::device_matrix_view index::centers() noexcept +{ + return centers_.view(); +} + +template +raft::device_matrix_view index::centers() + const noexcept +{ + return centers_.view(); +} + +template +std::optional> index::center_norms() noexcept +{ + if (center_norms_.has_value()) { + return std::make_optional>(center_norms_->view()); + } else { + return std::nullopt; + } +} + +template +std::optional> index::center_norms() + const noexcept +{ + if (center_norms_.has_value()) { + return std::make_optional>( + center_norms_->view()); + } else { + return std::nullopt; + } +} + +template +void index::allocate_center_norms(raft::resources const& res) +{ + switch (metric_) { + case cuvs::distance::DistanceType::L2Expanded: + case cuvs::distance::DistanceType::L2SqrtExpanded: + case cuvs::distance::DistanceType::L2Unexpanded: + case cuvs::distance::DistanceType::L2SqrtUnexpanded: + case cuvs::distance::DistanceType::CosineExpanded: + center_norms_ = raft::make_device_vector(res, n_lists()); + break; + default: center_norms_ = std::nullopt; + } +} + +template +raft::device_vector_view index::sq_vmin() noexcept +{ + return sq_vmin_.view(); +} + +template +raft::device_vector_view index::sq_vmin() const noexcept +{ + return sq_vmin_.view(); +} + +template +raft::device_vector_view index::sq_delta() noexcept +{ + return sq_delta_.view(); +} + +template +raft::device_vector_view index::sq_delta() const noexcept +{ + return sq_delta_.view(); +} + +template +raft::host_vector_view index::accum_sorted_sizes() noexcept +{ + return accum_sorted_sizes_.view(); +} + +template +raft::host_vector_view index::accum_sorted_sizes() const noexcept +{ + return accum_sorted_sizes_.view(); +} + +template +raft::device_vector_view index::data_ptrs() noexcept +{ + return data_ptrs_.view(); +} + +template +raft::device_vector_view index::data_ptrs() const noexcept +{ + return data_ptrs_.view(); +} + +template +raft::device_vector_view index::inds_ptrs() noexcept +{ + return inds_ptrs_.view(); +} + +template +raft::device_vector_view index::inds_ptrs() const noexcept +{ + return inds_ptrs_.view(); +} + +template +std::vector>>& index::lists() noexcept +{ + return lists_; +} + +template +const std::vector>>& index::lists() const noexcept +{ + return lists_; +} + +template +void index::check_consistency() +{ + auto n_lists = lists_.size(); + RAFT_EXPECTS(list_sizes_.extent(0) == n_lists, "inconsistent list size"); + RAFT_EXPECTS(data_ptrs_.extent(0) == n_lists, "inconsistent list size"); + RAFT_EXPECTS(inds_ptrs_.extent(0) == n_lists, "inconsistent list size"); + RAFT_EXPECTS((centers_.extent(0) == list_sizes_.extent(0)) && + (!center_norms_.has_value() || centers_.extent(0) == center_norms_->extent(0)), + "inconsistent number of lists (clusters)"); +} + +template struct index; + +} // namespace cuvs::neighbors::ivf_sq diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index bbddef87e5..8d90ed35b9 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -133,6 +133,13 @@ ConfigureTest( PERCENT 100 ) +ConfigureTest( + NAME NEIGHBORS_ANN_IVF_SQ_TEST + PATH neighbors/ann_ivf_sq/test_float_uint8_t.cu + GPUS 1 + PERCENT 100 +) + ConfigureTest( NAME NEIGHBORS_ANN_IVF_PQ_TEST PATH neighbors/ann_ivf_pq/test_float_int64_t.cu neighbors/ann_ivf_pq/test_int8_t_int64_t.cu diff --git a/cpp/tests/neighbors/ann_ivf_sq.cuh b/cpp/tests/neighbors/ann_ivf_sq.cuh new file mode 100644 index 0000000000..a7e02315e4 --- /dev/null +++ b/cpp/tests/neighbors/ann_ivf_sq.cuh @@ -0,0 +1,457 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ +#pragma once + +#include "../test_utils.cuh" +#include "ann_utils.cuh" +#include "naive_knn.cuh" + +#include +#include +#include +#include + +#include +#include + +namespace cuvs::neighbors::ivf_sq { + +struct test_ivf_sample_filter { + static constexpr unsigned offset = 300; +}; + +template +struct AnnIvfSqInputs { + IdxT num_queries; + IdxT num_db_vecs; + IdxT dim; + IdxT k; + IdxT nprobe; + IdxT nlist; + cuvs::distance::DistanceType metric; + bool adaptive_centers; +}; + +template +::std::ostream& operator<<(::std::ostream& os, const AnnIvfSqInputs& p) +{ + os << "{ " << p.num_queries << ", " << p.num_db_vecs << ", " << p.dim << ", " << p.k << ", " + << p.nprobe << ", " << p.nlist << ", " + << cuvs::neighbors::print_metric{static_cast((int)p.metric)} + << ", " << p.adaptive_centers << '}' << std::endl; + return os; +} + +template +class AnnIVFSQTest : public ::testing::TestWithParam> { + public: + AnnIVFSQTest() + : stream_(raft::resource::get_cuda_stream(handle_)), + ps(::testing::TestWithParam>::GetParam()), + database(0, stream_), + search_queries(0, stream_) + { + } + + void testIVFSQ() + { + size_t queries_size = ps.num_queries * ps.k; + std::vector indices_ivfsq(queries_size); + std::vector indices_naive(queries_size); + std::vector distances_ivfsq(queries_size); + std::vector distances_naive(queries_size); + + { + rmm::device_uvector distances_naive_dev(queries_size, stream_); + rmm::device_uvector indices_naive_dev(queries_size, stream_); + cuvs::neighbors::naive_knn(handle_, + distances_naive_dev.data(), + indices_naive_dev.data(), + search_queries.data(), + database.data(), + ps.num_queries, + ps.num_db_vecs, + ps.dim, + ps.k, + ps.metric); + raft::update_host(distances_naive.data(), distances_naive_dev.data(), queries_size, stream_); + raft::update_host(indices_naive.data(), indices_naive_dev.data(), queries_size, stream_); + raft::resource::sync_stream(handle_); + } + + { + double min_recall = + std::min(1.0, static_cast(ps.nprobe) / static_cast(ps.nlist)); + + rmm::device_uvector distances_ivfsq_dev(queries_size, stream_); + rmm::device_uvector indices_ivfsq_dev(queries_size, stream_); + + { + cuvs::neighbors::ivf_sq::index_params index_params; + cuvs::neighbors::ivf_sq::search_params search_params; + index_params.n_lists = ps.nlist; + index_params.metric = ps.metric; + index_params.adaptive_centers = ps.adaptive_centers; + search_params.n_probes = ps.nprobe; + + index_params.add_data_on_build = true; + index_params.kmeans_trainset_fraction = 0.5; + + auto database_view = raft::make_device_matrix_view( + (const DataT*)database.data(), ps.num_db_vecs, ps.dim); + + auto idx = cuvs::neighbors::ivf_sq::build(handle_, index_params, database_view); + + // Test extend: build without data, then extend + cuvs::neighbors::ivf_sq::index_params index_params_no_add; + index_params_no_add.n_lists = ps.nlist; + index_params_no_add.metric = ps.metric; + index_params_no_add.adaptive_centers = ps.adaptive_centers; + index_params_no_add.add_data_on_build = false; + index_params_no_add.kmeans_trainset_fraction = 0.5; + + auto idx_empty = + cuvs::neighbors::ivf_sq::build(handle_, index_params_no_add, database_view); + + auto vector_indices = raft::make_device_vector(handle_, ps.num_db_vecs); + raft::linalg::map_offset(handle_, vector_indices.view(), raft::identity_op{}); + raft::resource::sync_stream(handle_); + + auto indices_view = raft::make_device_vector_view( + vector_indices.data_handle(), ps.num_db_vecs); + cuvs::neighbors::ivf_sq::extend( + handle_, + database_view, + std::make_optional>(indices_view), + &idx_empty); + + // Serialize / deserialize round-trip + tmp_index_file index_file; + cuvs::neighbors::ivf_sq::serialize(handle_, index_file.filename, idx); + cuvs::neighbors::ivf_sq::index index_loaded(handle_); + cuvs::neighbors::ivf_sq::deserialize(handle_, index_file.filename, &index_loaded); + ASSERT_EQ(idx.size(), index_loaded.size()); + ASSERT_EQ(idx.dim(), index_loaded.dim()); + ASSERT_EQ(idx.n_lists(), index_loaded.n_lists()); + + auto search_queries_view = raft::make_device_matrix_view( + search_queries.data(), ps.num_queries, ps.dim); + auto indices_out_view = + raft::make_device_matrix_view(indices_ivfsq_dev.data(), ps.num_queries, ps.k); + auto dists_out_view = + raft::make_device_matrix_view(distances_ivfsq_dev.data(), ps.num_queries, ps.k); + + cuvs::neighbors::ivf_sq::search(handle_, + search_params, + index_loaded, + search_queries_view, + indices_out_view, + dists_out_view); + + raft::update_host( + distances_ivfsq.data(), distances_ivfsq_dev.data(), queries_size, stream_); + raft::update_host(indices_ivfsq.data(), indices_ivfsq_dev.data(), queries_size, stream_); + raft::resource::sync_stream(handle_); + } + // SQ introduces quantization error, so we relax the distance epsilon + float eps = 0.1; + ASSERT_TRUE(eval_neighbours(indices_naive, + indices_ivfsq, + distances_naive, + distances_ivfsq, + ps.num_queries, + ps.k, + eps, + min_recall)); + } + } + + void testFilter() + { + if (ps.num_db_vecs <= static_cast(test_ivf_sample_filter::offset)) { + GTEST_SKIP() << "Skipping filter test: num_db_vecs <= filter offset"; + } + + size_t queries_size = ps.num_queries * ps.k; + std::vector indices_ivfsq(queries_size); + std::vector indices_naive(queries_size); + std::vector distances_ivfsq(queries_size); + std::vector distances_naive(queries_size); + + { + rmm::device_uvector distances_naive_dev(queries_size, stream_); + rmm::device_uvector indices_naive_dev(queries_size, stream_); + auto* database_filtered_ptr = database.data() + test_ivf_sample_filter::offset * ps.dim; + cuvs::neighbors::naive_knn(handle_, + distances_naive_dev.data(), + indices_naive_dev.data(), + search_queries.data(), + database_filtered_ptr, + ps.num_queries, + ps.num_db_vecs - test_ivf_sample_filter::offset, + ps.dim, + ps.k, + ps.metric); + raft::linalg::addScalar(indices_naive_dev.data(), + indices_naive_dev.data(), + IdxT(test_ivf_sample_filter::offset), + queries_size, + stream_); + raft::update_host(distances_naive.data(), distances_naive_dev.data(), queries_size, stream_); + raft::update_host(indices_naive.data(), indices_naive_dev.data(), queries_size, stream_); + raft::resource::sync_stream(handle_); + } + + { + double min_recall = + std::min(1.0, static_cast(ps.nprobe) / static_cast(ps.nlist)); + + rmm::device_uvector distances_ivfsq_dev(queries_size, stream_); + rmm::device_uvector indices_ivfsq_dev(queries_size, stream_); + + { + cuvs::neighbors::ivf_sq::index_params index_params; + cuvs::neighbors::ivf_sq::search_params search_params; + index_params.n_lists = ps.nlist; + index_params.metric = ps.metric; + index_params.adaptive_centers = ps.adaptive_centers; + search_params.n_probes = ps.nprobe; + + index_params.add_data_on_build = true; + index_params.kmeans_trainset_fraction = 0.5; + + auto database_view = raft::make_device_matrix_view( + (const DataT*)database.data(), ps.num_db_vecs, ps.dim); + auto index = cuvs::neighbors::ivf_sq::build(handle_, index_params, database_view); + + auto removed_indices = + raft::make_device_vector(handle_, test_ivf_sample_filter::offset); + raft::linalg::map_offset(handle_, removed_indices.view(), raft::identity_op{}); + raft::resource::sync_stream(handle_); + + cuvs::core::bitset removed_indices_bitset( + handle_, removed_indices.view(), ps.num_db_vecs); + auto bitset_filter_obj = + cuvs::neighbors::filtering::bitset_filter(removed_indices_bitset.view()); + + auto search_queries_view = raft::make_device_matrix_view( + search_queries.data(), ps.num_queries, ps.dim); + auto indices_out_view = + raft::make_device_matrix_view(indices_ivfsq_dev.data(), ps.num_queries, ps.k); + auto dists_out_view = + raft::make_device_matrix_view(distances_ivfsq_dev.data(), ps.num_queries, ps.k); + + cuvs::neighbors::ivf_sq::search(handle_, + search_params, + index, + search_queries_view, + indices_out_view, + dists_out_view, + bitset_filter_obj); + + raft::update_host( + distances_ivfsq.data(), distances_ivfsq_dev.data(), queries_size, stream_); + raft::update_host(indices_ivfsq.data(), indices_ivfsq_dev.data(), queries_size, stream_); + raft::resource::sync_stream(handle_); + } + float eps = 0.1; + ASSERT_TRUE(eval_neighbours(indices_naive, + indices_ivfsq, + distances_naive, + distances_ivfsq, + ps.num_queries, + ps.k, + eps, + min_recall)); + } + } + + void SetUp() override + { + database.resize(ps.num_db_vecs * ps.dim, stream_); + search_queries.resize(ps.num_queries * ps.dim, stream_); + + raft::random::RngState r(1234ULL); + if constexpr (std::is_same_v || std::is_same_v) { + raft::random::uniform( + handle_, r, database.data(), ps.num_db_vecs * ps.dim, DataT(0.1), DataT(2.0)); + raft::random::uniform( + handle_, r, search_queries.data(), ps.num_queries * ps.dim, DataT(0.1), DataT(2.0)); + } + raft::resource::sync_stream(handle_); + } + + void TearDown() override + { + raft::resource::sync_stream(handle_); + database.resize(0, stream_); + search_queries.resize(0, stream_); + } + + private: + raft::resources handle_; + rmm::cuda_stream_view stream_; + AnnIvfSqInputs ps; + rmm::device_uvector database; + rmm::device_uvector search_queries; +}; + +const std::vector> inputs = { + // num_queries, num_db_vecs, dim, k, nprobe, nlist, metric, adaptive_centers + + // ===== Dimension edge cases (all four metrics) ===== + // dim=1 (CosineExpanded excluded: requires dim > 1) + {1000, 10000, 1, 10, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false}, + {1000, 10000, 1, 10, 40, 1024, cuvs::distance::DistanceType::InnerProduct, false}, + {1000, 10000, 1, 10, 40, 1024, cuvs::distance::DistanceType::L2SqrtExpanded, false}, + // dim=2,3,4,5 (unaligned) + {1000, 10000, 2, 16, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false}, + {1000, 10000, 2, 16, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, false}, + {1000, 10000, 3, 16, 40, 1024, cuvs::distance::DistanceType::L2Expanded, true}, + {1000, 10000, 3, 16, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, true}, + {1000, 10000, 4, 16, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false}, + {1000, 10000, 4, 16, 40, 1024, cuvs::distance::DistanceType::InnerProduct, false}, + {1000, 10000, 5, 16, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false}, + {1000, 10000, 5, 16, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, false}, + // dim=7,8 (around veclen=16 boundary, not a multiple of veclen) + {1000, 10000, 7, 16, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false}, + {1000, 10000, 7, 16, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, false}, + {1000, 10000, 8, 16, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false}, + {1000, 10000, 8, 16, 40, 1024, cuvs::distance::DistanceType::InnerProduct, true}, + {1000, 10000, 8, 16, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, true}, + // dim=15,16,17 (around veclen=16 boundary) + {1000, 10000, 15, 10, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false}, + {1000, 10000, 15, 10, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, false}, + {1000, 10000, 16, 10, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false}, + {1000, 10000, 16, 10, 40, 1024, cuvs::distance::DistanceType::InnerProduct, false}, + {1000, 10000, 16, 10, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, false}, + {1000, 10000, 16, 10, 40, 1024, cuvs::distance::DistanceType::L2SqrtExpanded, false}, + {1000, 10000, 17, 10, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false}, + {1000, 10000, 17, 10, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, false}, + // dim=31,32,33 (around 2*veclen boundary) + {1000, 10000, 31, 10, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false}, + {1000, 10000, 31, 10, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, false}, + {1000, 10000, 32, 10, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false}, + {1000, 10000, 32, 10, 40, 1024, cuvs::distance::DistanceType::InnerProduct, false}, + {1000, 10000, 32, 10, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, false}, + {1000, 10000, 33, 10, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false}, + {1000, 10000, 33, 10, 40, 1024, cuvs::distance::DistanceType::InnerProduct, false}, + // medium dims + {1000, 10000, 64, 10, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false}, + {1000, 10000, 64, 10, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, false}, + {1000, 10000, 128, 10, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false}, + {1000, 10000, 128, 10, 40, 1024, cuvs::distance::DistanceType::InnerProduct, false}, + {1000, 10000, 128, 10, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, false}, + {1000, 10000, 128, 10, 40, 1024, cuvs::distance::DistanceType::L2SqrtExpanded, false}, + {1000, 10000, 256, 10, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false}, + {1000, 10000, 256, 10, 40, 1024, cuvs::distance::DistanceType::InnerProduct, false}, + // large dims (may exceed shared memory limits) + {1000, 10000, 2048, 16, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false}, + {1000, 10000, 2048, 16, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, false}, + {1000, 10000, 2049, 16, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false}, + {1000, 10000, 2049, 16, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, false}, + {1000, 10000, 2050, 16, 40, 1024, cuvs::distance::DistanceType::InnerProduct, false}, + {1000, 10000, 2050, 16, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, false}, + {1000, 10000, 4096, 20, 50, 1024, cuvs::distance::DistanceType::L2Expanded, false}, + {1000, 10000, 4096, 20, 50, 1024, cuvs::distance::DistanceType::InnerProduct, false}, + {1000, 10000, 4096, 20, 50, 1024, cuvs::distance::DistanceType::CosineExpanded, false}, + + // ===== k edge cases ===== + {1000, 10000, 16, 1, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false}, + {1000, 10000, 16, 1, 40, 1024, cuvs::distance::DistanceType::InnerProduct, false}, + {1000, 10000, 16, 1, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, false}, + {1000, 10000, 16, 2, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false}, + {1000, 10000, 16, 5, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false}, + {1000, 10000, 16, 10, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false}, + {1000, 10000, 16, 20, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false}, + {1000, 10000, 16, 20, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, false}, + {1000, 10000, 16, 50, 100, 1024, cuvs::distance::DistanceType::L2Expanded, false}, + {1000, 10000, 16, 100, 200, 1024, cuvs::distance::DistanceType::L2Expanded, false}, + {1000, 10000, 16, 100, 200, 1024, cuvs::distance::DistanceType::InnerProduct, false}, + + // ===== nprobe / nlist edge cases ===== + // nprobe == nlist (exhaustive probe) + {1000, 10000, 16, 10, 64, 64, cuvs::distance::DistanceType::L2Expanded, false}, + {1000, 10000, 16, 10, 64, 64, cuvs::distance::DistanceType::InnerProduct, false}, + {1000, 10000, 16, 10, 64, 64, cuvs::distance::DistanceType::CosineExpanded, false}, + // nprobe == 1 (minimal probe) + {1000, 10000, 16, 10, 1, 1024, cuvs::distance::DistanceType::L2Expanded, false}, + {1000, 10000, 16, 10, 1, 1024, cuvs::distance::DistanceType::CosineExpanded, false}, + // nprobe > nlist (clamped to nlist) + {1000, 10000, 16, 10, 2048, 1024, cuvs::distance::DistanceType::L2Expanded, false}, + {1000, 10000, 16, 10, 2048, 1024, cuvs::distance::DistanceType::CosineExpanded, false}, + // various nprobe + {1000, 10000, 16, 10, 50, 1024, cuvs::distance::DistanceType::L2Expanded, false}, + {1000, 10000, 16, 10, 70, 1024, cuvs::distance::DistanceType::L2Expanded, false}, + {1000, 10000, 16, 10, 50, 1024, cuvs::distance::DistanceType::InnerProduct, false}, + {1000, 10000, 16, 10, 70, 1024, cuvs::distance::DistanceType::InnerProduct, false}, + {1000, 10000, 16, 10, 50, 1024, cuvs::distance::DistanceType::CosineExpanded, false}, + {1000, 10000, 16, 10, 70, 1024, cuvs::distance::DistanceType::CosineExpanded, false}, + {1000, 10000, 16, 10, 50, 1024, cuvs::distance::DistanceType::L2SqrtExpanded, false}, + {1000, 10000, 16, 10, 70, 1024, cuvs::distance::DistanceType::L2SqrtExpanded, false}, + // very small nlist + {100, 10000, 16, 10, 8, 8, cuvs::distance::DistanceType::L2Expanded, false}, + {100, 10000, 16, 10, 8, 8, cuvs::distance::DistanceType::CosineExpanded, false}, + // smaller nlist + {100, 10000, 16, 10, 20, 512, cuvs::distance::DistanceType::L2Expanded, false}, + {100, 10000, 16, 10, 20, 512, cuvs::distance::DistanceType::InnerProduct, false}, + {100, 10000, 16, 10, 20, 512, cuvs::distance::DistanceType::CosineExpanded, false}, + {100, 10000, 16, 10, 20, 512, cuvs::distance::DistanceType::L2SqrtExpanded, false}, + + // ===== Dataset size edge cases ===== + // single query + {1, 10000, 16, 10, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false}, + {1, 10000, 16, 10, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, false}, + // very few queries + {2, 10000, 16, 10, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false}, + {5, 10000, 16, 10, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, false}, + // very few db vectors (nlist reduced to fit) + {100, 500, 16, 10, 40, 256, cuvs::distance::DistanceType::L2Expanded, false}, + {100, 500, 16, 10, 40, 256, cuvs::distance::DistanceType::CosineExpanded, false}, + // small db with many empty clusters + {100, 100, 16, 5, 20, 64, cuvs::distance::DistanceType::L2Expanded, false}, + {100, 100, 16, 5, 20, 64, cuvs::distance::DistanceType::CosineExpanded, false}, + // larger datasets + {20, 100000, 16, 10, 20, 1024, cuvs::distance::DistanceType::L2Expanded, true}, + {20, 100000, 16, 10, 20, 1024, cuvs::distance::DistanceType::CosineExpanded, true}, + {1000, 100000, 16, 10, 20, 1024, cuvs::distance::DistanceType::L2Expanded, true}, + {1000, 100000, 16, 10, 20, 1024, cuvs::distance::DistanceType::CosineExpanded, true}, + {10000, 131072, 8, 10, 20, 1024, cuvs::distance::DistanceType::L2Expanded, false}, + {10000, 131072, 8, 10, 20, 1024, cuvs::distance::DistanceType::CosineExpanded, false}, + {10000, 131072, 8, 10, 50, 1024, cuvs::distance::DistanceType::InnerProduct, true}, + {10000, 131072, 8, 10, 50, 1024, cuvs::distance::DistanceType::L2SqrtExpanded, false}, + + // ===== Large query batches (gridDim.x > 65535) ===== + {100000, 1024, 32, 10, 64, 64, cuvs::distance::DistanceType::L2Expanded, false}, + {100000, 1024, 32, 10, 64, 64, cuvs::distance::DistanceType::InnerProduct, false}, + {100000, 1024, 32, 10, 64, 64, cuvs::distance::DistanceType::CosineExpanded, false}, + {100000, 1024, 32, 10, 64, 64, cuvs::distance::DistanceType::L2SqrtExpanded, false}, + {100000, 8712, 3, 10, 51, 66, cuvs::distance::DistanceType::L2Expanded, false}, + {100000, 8712, 3, 10, 51, 66, cuvs::distance::DistanceType::CosineExpanded, false}, + // just above the old 65535 limit + {65536, 1024, 16, 10, 32, 64, cuvs::distance::DistanceType::L2Expanded, false}, + {65536, 1024, 16, 10, 32, 64, cuvs::distance::DistanceType::CosineExpanded, false}, + + // ===== Adaptive centers (all four metrics, multiple dims) ===== + {1000, 10000, 8, 10, 40, 1024, cuvs::distance::DistanceType::L2Expanded, true}, + {1000, 10000, 8, 10, 40, 1024, cuvs::distance::DistanceType::InnerProduct, true}, + {1000, 10000, 8, 10, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, true}, + {1000, 10000, 8, 10, 40, 1024, cuvs::distance::DistanceType::L2SqrtExpanded, true}, + {1000, 10000, 16, 10, 40, 1024, cuvs::distance::DistanceType::L2Expanded, true}, + {1000, 10000, 16, 10, 40, 1024, cuvs::distance::DistanceType::InnerProduct, true}, + {1000, 10000, 16, 10, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, true}, + {1000, 10000, 16, 10, 40, 1024, cuvs::distance::DistanceType::L2SqrtExpanded, true}, + {1000, 10000, 32, 10, 50, 1024, cuvs::distance::DistanceType::L2Expanded, true}, + {1000, 10000, 32, 10, 50, 1024, cuvs::distance::DistanceType::InnerProduct, true}, + {1000, 10000, 32, 10, 50, 1024, cuvs::distance::DistanceType::CosineExpanded, true}, + {1000, 10000, 128, 10, 40, 1024, cuvs::distance::DistanceType::L2Expanded, true}, + {1000, 10000, 128, 10, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, true}, + + // ===== Recall-stability: same data, different query counts ===== + {20000, 8712, 3, 10, 51, 66, cuvs::distance::DistanceType::L2Expanded, false}, + {50000, 8712, 3, 10, 51, 66, cuvs::distance::DistanceType::L2Expanded, false}, +}; + +} // namespace cuvs::neighbors::ivf_sq diff --git a/cpp/tests/neighbors/ann_ivf_sq/test_float_uint8_t.cu b/cpp/tests/neighbors/ann_ivf_sq/test_float_uint8_t.cu new file mode 100644 index 0000000000..02ec8a7dfc --- /dev/null +++ b/cpp/tests/neighbors/ann_ivf_sq/test_float_uint8_t.cu @@ -0,0 +1,21 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#include + +#include "../ann_ivf_sq.cuh" + +namespace cuvs::neighbors::ivf_sq { + +typedef AnnIVFSQTest AnnIVFSQTestF_float; +TEST_P(AnnIVFSQTestF_float, AnnIVFSQ) +{ + this->testIVFSQ(); + this->testFilter(); +} + +INSTANTIATE_TEST_CASE_P(AnnIVFSQTest, AnnIVFSQTestF_float, ::testing::ValuesIn(inputs)); + +} // namespace cuvs::neighbors::ivf_sq diff --git a/docs/source/python_api/neighbors.rst b/docs/source/python_api/neighbors.rst index 909f2013ad..614dc0b8e1 100644 --- a/docs/source/python_api/neighbors.rst +++ b/docs/source/python_api/neighbors.rst @@ -17,6 +17,7 @@ Single-GPU Algorithms neighbors_hnsw.rst neighbors_ivf_flat.rst neighbors_ivf_pq.rst + neighbors_ivf_sq.rst neighbors_nn_decent.rst Multi-GPU Algorithms diff --git a/docs/source/python_api/neighbors_ivf_sq.rst b/docs/source/python_api/neighbors_ivf_sq.rst new file mode 100644 index 0000000000..7a1b8e2ac9 --- /dev/null +++ b/docs/source/python_api/neighbors_ivf_sq.rst @@ -0,0 +1,49 @@ +IVF-SQ +====== + +.. role:: py(code) + :language: python + :class: highlight + +Index build parameters +###################### + +.. autoclass:: cuvs.neighbors.ivf_sq.IndexParams + :members: + +Index search parameters +####################### + +.. autoclass:: cuvs.neighbors.ivf_sq.SearchParams + :members: + +Index +##### + +.. autoclass:: cuvs.neighbors.ivf_sq.Index + :members: + +Index build +########### + +.. autofunction:: cuvs.neighbors.ivf_sq.build + +Index search +############ + +.. autofunction:: cuvs.neighbors.ivf_sq.search + +Index save +########## + +.. autofunction:: cuvs.neighbors.ivf_sq.save + +Index load +########## + +.. autofunction:: cuvs.neighbors.ivf_sq.load + +Index extend +############ + +.. autofunction:: cuvs.neighbors.ivf_sq.extend diff --git a/python/cuvs/cuvs/neighbors/CMakeLists.txt b/python/cuvs/cuvs/neighbors/CMakeLists.txt index 6a48508be5..8124962cd4 100644 --- a/python/cuvs/cuvs/neighbors/CMakeLists.txt +++ b/python/cuvs/cuvs/neighbors/CMakeLists.txt @@ -1,6 +1,6 @@ # ============================================================================= # cmake-format: off -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 # cmake-format: on # @@ -9,6 +9,7 @@ add_subdirectory(cagra) add_subdirectory(hnsw) add_subdirectory(ivf_flat) add_subdirectory(ivf_pq) +add_subdirectory(ivf_sq) add_subdirectory(filters) add_subdirectory(nn_descent) add_subdirectory(tiered_index) diff --git a/python/cuvs/cuvs/neighbors/__init__.py b/python/cuvs/cuvs/neighbors/__init__.py index 016f98ce60..6b4966bc18 100644 --- a/python/cuvs/cuvs/neighbors/__init__.py +++ b/python/cuvs/cuvs/neighbors/__init__.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 @@ -9,6 +9,7 @@ filters, ivf_flat, ivf_pq, + ivf_sq, mg, nn_descent, vamana, @@ -22,6 +23,7 @@ "filters", "ivf_flat", "ivf_pq", + "ivf_sq", "mg", "nn_descent", "all_neighbors", diff --git a/python/cuvs/cuvs/neighbors/ivf_sq/CMakeLists.txt b/python/cuvs/cuvs/neighbors/ivf_sq/CMakeLists.txt new file mode 100644 index 0000000000..5fe85c9de5 --- /dev/null +++ b/python/cuvs/cuvs/neighbors/ivf_sq/CMakeLists.txt @@ -0,0 +1,17 @@ +# ============================================================================= +# cmake-format: off +# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. +# SPDX-License-Identifier: Apache-2.0 +# cmake-format: on +# ============================================================================= + +# Set the list of Cython files to build +set(cython_sources ivf_sq.pyx) +set(linked_libraries cuvs::cuvs cuvs::c_api) + +# Build all of the Cython targets +rapids_cython_create_modules( + CXX + SOURCE_FILES "${cython_sources}" + LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX neighbors_ivf_sq_ +) diff --git a/python/cuvs/cuvs/neighbors/ivf_sq/__init__.py b/python/cuvs/cuvs/neighbors/ivf_sq/__init__.py new file mode 100644 index 0000000000..bec1a652db --- /dev/null +++ b/python/cuvs/cuvs/neighbors/ivf_sq/__init__.py @@ -0,0 +1,25 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. +# SPDX-License-Identifier: Apache-2.0 + + +from .ivf_sq import ( + Index, + IndexParams, + SearchParams, + build, + extend, + load, + save, + search, +) + +__all__ = [ + "Index", + "IndexParams", + "SearchParams", + "build", + "extend", + "load", + "save", + "search", +] diff --git a/python/cuvs/cuvs/neighbors/ivf_sq/ivf_sq.pxd b/python/cuvs/cuvs/neighbors/ivf_sq/ivf_sq.pxd new file mode 100644 index 0000000000..99617f7d0c --- /dev/null +++ b/python/cuvs/cuvs/neighbors/ivf_sq/ivf_sq.pxd @@ -0,0 +1,102 @@ +# +# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. +# SPDX-License-Identifier: Apache-2.0 +# +# cython: language_level=3 + +from libc.stdint cimport int64_t, uint32_t, uintptr_t +from libcpp cimport bool + +from cuvs.common.c_api cimport cuvsError_t, cuvsResources_t +from cuvs.common.cydlpack cimport DLDataType, DLManagedTensor +from cuvs.distance_type cimport cuvsDistanceType +from cuvs.neighbors.filters.filters cimport cuvsFilter + + +cdef extern from "cuvs/neighbors/ivf_sq.h" nogil: + + ctypedef struct cuvsIvfSqIndexParams: + cuvsDistanceType metric + float metric_arg + bool add_data_on_build + uint32_t n_lists + uint32_t kmeans_n_iters + double kmeans_trainset_fraction + bool adaptive_centers + bool conservative_memory_allocation + + ctypedef cuvsIvfSqIndexParams* cuvsIvfSqIndexParams_t + + ctypedef struct cuvsIvfSqSearchParams: + uint32_t n_probes + + ctypedef cuvsIvfSqSearchParams* cuvsIvfSqSearchParams_t + + ctypedef struct cuvsIvfSqIndex: + uintptr_t addr + DLDataType dtype + + ctypedef cuvsIvfSqIndex* cuvsIvfSqIndex_t + + cuvsError_t cuvsIvfSqIndexParamsCreate(cuvsIvfSqIndexParams_t* params) + + cuvsError_t cuvsIvfSqIndexParamsDestroy(cuvsIvfSqIndexParams_t index) + + cuvsError_t cuvsIvfSqSearchParamsCreate( + cuvsIvfSqSearchParams_t* params) + + cuvsError_t cuvsIvfSqSearchParamsDestroy(cuvsIvfSqSearchParams_t index) + + cuvsError_t cuvsIvfSqIndexCreate(cuvsIvfSqIndex_t* index) + + cuvsError_t cuvsIvfSqIndexDestroy(cuvsIvfSqIndex_t index) + + cuvsError_t cuvsIvfSqIndexGetNLists(cuvsIvfSqIndex_t index, + int64_t * n_lists) + + cuvsError_t cuvsIvfSqIndexGetDim(cuvsIvfSqIndex_t index, int64_t * dim) + + cuvsError_t cuvsIvfSqIndexGetSize(cuvsIvfSqIndex_t index, int64_t * size) + + cuvsError_t cuvsIvfSqIndexGetCenters(cuvsIvfSqIndex_t index, + DLManagedTensor * centers) + + cuvsError_t cuvsIvfSqBuild(cuvsResources_t res, + cuvsIvfSqIndexParams* params, + DLManagedTensor* dataset, + cuvsIvfSqIndex_t index) except + + + cuvsError_t cuvsIvfSqSearch(cuvsResources_t res, + cuvsIvfSqSearchParams* params, + cuvsIvfSqIndex_t index, + DLManagedTensor* queries, + DLManagedTensor* neighbors, + DLManagedTensor* distances) except + + + cuvsError_t cuvsIvfSqSearchWithFilter(cuvsResources_t res, + cuvsIvfSqSearchParams* params, + cuvsIvfSqIndex_t index, + DLManagedTensor* queries, + DLManagedTensor* neighbors, + DLManagedTensor* distances, + cuvsFilter filter) except + + + cuvsError_t cuvsIvfSqSerialize(cuvsResources_t res, + const char * filename, + cuvsIvfSqIndex_t index) except + + + cuvsError_t cuvsIvfSqDeserialize(cuvsResources_t res, + const char * filename, + cuvsIvfSqIndex_t index) except + + + cuvsError_t cuvsIvfSqExtend(cuvsResources_t res, + DLManagedTensor* new_vectors, + DLManagedTensor* new_indices, + cuvsIvfSqIndex_t index) + + +cdef class IndexParams: + cdef cuvsIvfSqIndexParams* params + +cdef class SearchParams: + cdef cuvsIvfSqSearchParams* params diff --git a/python/cuvs/cuvs/neighbors/ivf_sq/ivf_sq.pyx b/python/cuvs/cuvs/neighbors/ivf_sq/ivf_sq.pyx new file mode 100644 index 0000000000..7f8f2b0897 --- /dev/null +++ b/python/cuvs/cuvs/neighbors/ivf_sq/ivf_sq.pyx @@ -0,0 +1,547 @@ +# +# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. +# SPDX-License-Identifier: Apache-2.0 +# +# cython: language_level=3 + +import numpy as np + +cimport cuvs.common.cydlpack + +from cuvs.common.resources import auto_sync_resources + +from cython.operator cimport dereference as deref +from libcpp cimport bool, cast +from libcpp.string cimport string + +from cuvs.common cimport cydlpack +from cuvs.distance_type cimport cuvsDistanceType + +from pylibraft.common import auto_convert_output, cai_wrapper, device_ndarray +from pylibraft.common.cai_wrapper import wrap_array +from pylibraft.common.interruptible import cuda_interruptible + +from cuvs.common.device_tensor_view import DeviceTensorView +from cuvs.distance import DISTANCE_NAMES, DISTANCE_TYPES +from cuvs.neighbors.common import _check_input_array +from cuvs.neighbors.filters import no_filter + +from libc.stdint cimport ( + int8_t, + int64_t, + uint8_t, + uint32_t, + uint64_t, + uintptr_t, +) + +from cuvs.common.exceptions import check_cuvs + + +cdef class IndexParams: + """ + Parameters to build index for IvfSq nearest neighbor search + + Parameters + ---------- + n_lists : int, default = 1024 + The number of clusters used in the coarse quantizer. + metric : str, default = "sqeuclidean" + String denoting the metric type. + Valid values for metric: ["sqeuclidean", "inner_product", + "euclidean", "cosine"], where + + - sqeuclidean is the euclidean distance without the square root + operation, i.e.: distance(a,b) = \\sum_i (a_i - b_i)^2, + - euclidean is the euclidean distance + - inner product distance is defined as + distance(a, b) = \\sum_i a_i * b_i. + - cosine distance is defined as + distance(a, b) = 1 - \\sum_i a_i * b_i / ( ||a||_2 * ||b||_2). + + kmeans_n_iters : int, default = 20 + The number of iterations searching for kmeans centers during index + building. + kmeans_trainset_fraction : int, default = 0.5 + If kmeans_trainset_fraction is less than 1, then the dataset is + subsampled, and only n_samples * kmeans_trainset_fraction rows + are used for training. + add_data_on_build : bool, default = True + After training the coarse and fine quantizers, we will populate + the index with the dataset if add_data_on_build == True, otherwise + the index is left empty, and the extend method can be used + to add new vectors to the index. + adaptive_centers : bool, default = False + By default (adaptive_centers = False), the cluster centers are + trained in `ivf_sq.build`, and never modified in + `ivf_sq.extend`. The alternative behavior (adaptive_centers + = true) is to update the cluster centers for new data when it is + added. In this case, `index.centers()` are always exactly the + centroids of the data in the corresponding clusters. The drawback + of this behavior is that the centroids depend on the order of + adding new data (through the classification of the added data); + that is, `index.centers()` "drift" together with the changing + distribution of the newly added data. + conservative_memory_allocation : bool, default = False + By default, the algorithm allocates more space than necessary for + individual clusters (`list_data`). This allows to amortize the cost + of memory allocation and reduce the number of data copies during + repeated calls to `extend` (extending the database). + To disable this behavior and use as little GPU memory for the + database as possible, set this flag to `True`. + """ + + def __cinit__(self): + cuvsIvfSqIndexParamsCreate(&self.params) + + def __dealloc__(self): + if self.params != NULL: + check_cuvs(cuvsIvfSqIndexParamsDestroy(self.params)) + + def __init__(self, *, + n_lists=1024, + metric="sqeuclidean", + metric_arg=2.0, + kmeans_n_iters=20, + kmeans_trainset_fraction=0.5, + adaptive_centers=False, + add_data_on_build=True, + conservative_memory_allocation=False): + self.params.metric = DISTANCE_TYPES[metric] + self.params.metric_arg = metric_arg + self.params.add_data_on_build = add_data_on_build + self.params.n_lists = n_lists + self.params.kmeans_n_iters = kmeans_n_iters + self.params.kmeans_trainset_fraction = kmeans_trainset_fraction + self.params.adaptive_centers = adaptive_centers + self.params.conservative_memory_allocation = \ + conservative_memory_allocation + + def get_handle(self): + return self.params + + @property + def metric(self): + return DISTANCE_NAMES[self.params.metric] + + @property + def metric_arg(self): + return self.params.metric_arg + + @property + def add_data_on_build(self): + return self.params.add_data_on_build + + @property + def n_lists(self): + return self.params.n_lists + + @property + def kmeans_n_iters(self): + return self.params.kmeans_n_iters + + @property + def kmeans_trainset_fraction(self): + return self.params.kmeans_trainset_fraction + + @property + def adaptive_centers(self): + return self.params.adaptive_centers + + @property + def conservative_memory_allocation(self): + return self.params.conservative_memory_allocation + + +cdef class Index: + """ + IvfSq index object. This object stores the trained IvfSq index state + which can be used to perform nearest neighbors searches. + """ + + cdef cuvsIvfSqIndex_t index + cdef bool trained + + def __cinit__(self): + self.trained = False + check_cuvs(cuvsIvfSqIndexCreate(&self.index)) + + def __dealloc__(self): + check_cuvs(cuvsIvfSqIndexDestroy(self.index)) + + @property + def trained(self): + return self.trained + + def __repr__(self): + return "Index(type=IvfSq)" + + @property + def n_lists(self): + """ The number of inverted lists (clusters) """ + cdef int64_t n_lists = 0 + cuvsIvfSqIndexGetNLists(self.index, &n_lists) + return n_lists + + @property + def dim(self): + """ dimensionality of the cluster centers """ + cdef int64_t dim = 0 + cuvsIvfSqIndexGetDim(self.index, &dim) + return dim + + def __len__(self): + cdef int64_t size = 0 + check_cuvs(cuvsIvfSqIndexGetSize(self.index, &size)) + return size + + @property + def centers(self): + """ Get the cluster centers corresponding to the lists in the + original space """ + if not self.trained: + raise ValueError("Index needs to be built before getting centers") + + output = DeviceTensorView() + cdef cydlpack.DLManagedTensor * tensor = \ + output.get_handle() + check_cuvs(cuvsIvfSqIndexGetCenters(self.index, tensor)) + output.parent = self + + return output + + +@auto_sync_resources +def build(IndexParams index_params, dataset, resources=None): + """ + Build the IvfSq index from the dataset for efficient search. + + IVF-SQ (Scalar Quantization) combines an IVF coarse quantizer with + per-dimension scalar quantization. Each vector's residual is encoded + as one byte per dimension, providing ~4x memory reduction vs IVF-Flat + with higher recall than IVF-PQ at similar memory budgets. + + Parameters + ---------- + index_params : :py:class:`cuvs.neighbors.ivf_sq.IndexParams` + dataset : CUDA array interface compliant matrix shape (n_samples, dim) + Supported dtype [float32, float16] + {resources_docstring} + + Returns + ------- + index: py:class:`cuvs.neighbors.ivf_sq.Index` + + Examples + -------- + + >>> import cupy as cp + >>> from cuvs.neighbors import ivf_sq + >>> n_samples = 50000 + >>> n_features = 50 + >>> n_queries = 1000 + >>> k = 10 + >>> dataset = cp.random.random_sample((n_samples, n_features), + ... dtype=cp.float32) + >>> build_params = ivf_sq.IndexParams(metric="sqeuclidean") + >>> index = ivf_sq.build(build_params, dataset) + >>> distances, neighbors = ivf_sq.search(ivf_sq.SearchParams(), + ... index, dataset, + ... k) + >>> distances = cp.asarray(distances) + >>> neighbors = cp.asarray(neighbors) + """ + + dataset_ai = wrap_array(dataset) + _check_input_array(dataset_ai, [np.dtype('float32'), + np.dtype('float16')]) + + cdef Index idx = Index() + cdef cydlpack.DLManagedTensor* dataset_dlpack = \ + cydlpack.dlpack_c(dataset_ai) + cdef cuvsIvfSqIndexParams* params = index_params.params + + cdef cuvsResources_t res = resources.get_c_obj() + + with cuda_interruptible(): + check_cuvs(cuvsIvfSqBuild( + res, + params, + dataset_dlpack, + idx.index + )) + idx.trained = True + + return idx + + +cdef class SearchParams: + """ + Supplemental parameters to search IVF-SQ index + + Parameters + ---------- + n_probes: int + The number of clusters to search. + """ + + def __cinit__(self): + cuvsIvfSqSearchParamsCreate(&self.params) + + def __dealloc__(self): + if self.params != NULL: + check_cuvs(cuvsIvfSqSearchParamsDestroy(self.params)) + + def __init__(self, *, n_probes=20): + self.params.n_probes = n_probes + + def get_handle(self): + return self.params + + @property + def n_probes(self): + return self.params.n_probes + + +@auto_sync_resources +@auto_convert_output +def search(SearchParams search_params, + Index index, + queries, + k, + neighbors=None, + distances=None, + resources=None, + filter=None): + """ + Find the k nearest neighbors for each query. + + Parameters + ---------- + search_params : py:class:`cuvs.neighbors.ivf_sq.SearchParams` + index : py:class:`cuvs.neighbors.ivf_sq.Index` + Trained IvfSq index. + queries : CUDA array interface compliant matrix shape (n_samples, dim) + Supported dtype [float32, float16] + k : int + The number of neighbors. + neighbors : Optional CUDA array interface compliant matrix shape + (n_queries, k), dtype int64_t. If supplied, neighbor + indices will be written here in-place. (default None) + distances : Optional CUDA array interface compliant matrix shape + (n_queries, k) If supplied, the distances to the + neighbors will be written here in-place. (default None) + filter: Optional cuvs.neighbors.cuvsFilter can be used to filter + neighbors based on a given bitset. (default None) + {resources_docstring} + + Examples + -------- + >>> import cupy as cp + >>> from cuvs.neighbors import ivf_sq + >>> n_samples = 50000 + >>> n_features = 50 + >>> n_queries = 1000 + >>> dataset = cp.random.random_sample((n_samples, n_features), + ... dtype=cp.float32) + >>> # Build the index + >>> index = ivf_sq.build(ivf_sq.IndexParams(), dataset) + >>> + >>> # Search using the built index + >>> queries = cp.random.random_sample((n_queries, n_features), + ... dtype=cp.float32) + >>> k = 10 + >>> search_params = ivf_sq.SearchParams(n_probes=20) + >>> + >>> distances, neighbors = ivf_sq.search(search_params, index, queries, + ... k) + """ + if not index.trained: + raise ValueError("Index needs to be built before calling search.") + + queries_cai = wrap_array(queries) + _check_input_array(queries_cai, [np.dtype('float32'), + np.dtype('float16')]) + + cdef uint32_t n_queries = queries_cai.shape[0] + + if neighbors is None: + neighbors = device_ndarray.empty((n_queries, k), dtype='int64') + + neighbors_cai = wrap_array(neighbors) + _check_input_array(neighbors_cai, [np.dtype('int64')], + exp_rows=n_queries, exp_cols=k) + + if distances is None: + distances = device_ndarray.empty((n_queries, k), dtype='float32') + + distances_cai = wrap_array(distances) + _check_input_array(distances_cai, [np.dtype('float32')], + exp_rows=n_queries, exp_cols=k) + + cdef cuvsIvfSqSearchParams* params = search_params.params + cdef cydlpack.DLManagedTensor* queries_dlpack = \ + cydlpack.dlpack_c(queries_cai) + cdef cydlpack.DLManagedTensor* neighbors_dlpack = \ + cydlpack.dlpack_c(neighbors_cai) + cdef cydlpack.DLManagedTensor* distances_dlpack = \ + cydlpack.dlpack_c(distances_cai) + cdef cuvsResources_t res = resources.get_c_obj() + + if filter is None: + filter = no_filter() + + with cuda_interruptible(): + check_cuvs(cuvsIvfSqSearchWithFilter( + res, + params, + index.index, + queries_dlpack, + neighbors_dlpack, + distances_dlpack, + filter.prefilter + )) + + return (distances, neighbors) + + +@auto_sync_resources +def save(filename, Index index, bool include_dataset=True, resources=None): + """ + Saves the index to a file. + + Saving / loading the index is experimental. The serialization format is + subject to change. + + Parameters + ---------- + filename : string + Name of the file. + index : Index + Trained IVF-SQ index. + {resources_docstring} + + Examples + -------- + >>> import cupy as cp + >>> from cuvs.neighbors import ivf_sq + >>> n_samples = 50000 + >>> n_features = 50 + >>> dataset = cp.random.random_sample((n_samples, n_features), + ... dtype=cp.float32) + >>> # Build index + >>> index = ivf_sq.build(ivf_sq.IndexParams(), dataset) + >>> # Serialize and deserialize the ivf_sq index built + >>> ivf_sq.save("my_index.bin", index) + >>> index_loaded = ivf_sq.load("my_index.bin") + """ + cdef string c_filename = filename.encode('utf-8') + cdef cuvsResources_t res = resources.get_c_obj() + check_cuvs(cuvsIvfSqSerialize(res, + c_filename.c_str(), + index.index)) + + +@auto_sync_resources +def load(filename, resources=None): + """ + Loads index from file. + + Saving / loading the index is experimental. The serialization format is + subject to change, therefore loading an index saved with a previous + version of cuvs is not guaranteed to work. + + Parameters + ---------- + filename : string + Name of the file. + {resources_docstring} + + Returns + ------- + index : Index + + """ + cdef Index idx = Index() + cdef cuvsResources_t res = resources.get_c_obj() + cdef string c_filename = filename.encode('utf-8') + + check_cuvs(cuvsIvfSqDeserialize( + res, + c_filename.c_str(), + idx.index + )) + idx.trained = True + return idx + + +@auto_sync_resources +def extend(Index index, new_vectors, new_indices, resources=None): + """ + Extend an existing index with new vectors. + + The input array can be either CUDA array interface compliant matrix or + array interface compliant matrix in host memory. + + + Parameters + ---------- + index : ivf_sq.Index + Trained ivf_sq object. + new_vectors : array interface compliant matrix shape (n_samples, dim) + Supported dtype [float32, float16] + new_indices : array interface compliant vector shape (n_samples) + Supported dtype [int64] + {resources_docstring} + + Returns + ------- + index: py:class:`cuvs.neighbors.ivf_sq.Index` + + Examples + -------- + + >>> import cupy as cp + >>> from cuvs.neighbors import ivf_sq + >>> n_samples = 50000 + >>> n_features = 50 + >>> n_queries = 1000 + >>> dataset = cp.random.random_sample((n_samples, n_features), + ... dtype=cp.float32) + >>> index = ivf_sq.build(ivf_sq.IndexParams(), dataset) + >>> n_rows = 100 + >>> more_data = cp.random.random_sample((n_rows, n_features), + ... dtype=cp.float32) + >>> indices = n_samples + cp.arange(n_rows, dtype=cp.int64) + >>> index = ivf_sq.extend(index, more_data, indices) + >>> # Search using the built index + >>> queries = cp.random.random_sample((n_queries, n_features), + ... dtype=cp.float32) + >>> distances, neighbors = ivf_sq.search(ivf_sq.SearchParams(), + ... index, queries, + ... k=10) + """ + + new_vectors_ai = wrap_array(new_vectors) + _check_input_array(new_vectors_ai, + [np.dtype('float32'), np.dtype('float16')]) + + new_indices_ai = wrap_array(new_indices) + _check_input_array(new_indices_ai, [np.dtype('int64')]) + cdef cuvsResources_t res = resources.get_c_obj() + + cdef cydlpack.DLManagedTensor* new_vectors_dlpack = \ + cydlpack.dlpack_c(new_vectors_ai) + + cdef cydlpack.DLManagedTensor* new_indices_dlpack = \ + cydlpack.dlpack_c(new_indices_ai) + + with cuda_interruptible(): + check_cuvs(cuvsIvfSqExtend( + res, + new_vectors_dlpack, + new_indices_dlpack, + index.index + )) + + return index diff --git a/python/cuvs/cuvs/tests/test_ivf_sq.py b/python/cuvs/cuvs/tests/test_ivf_sq.py new file mode 100644 index 0000000000..f4a03fd14b --- /dev/null +++ b/python/cuvs/cuvs/tests/test_ivf_sq.py @@ -0,0 +1,143 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. +# SPDX-License-Identifier: Apache-2.0 +# + +import tempfile + +import numpy as np +import pytest +from pylibraft.common import device_ndarray +from sklearn.neighbors import NearestNeighbors +from sklearn.preprocessing import normalize + +from cuvs.neighbors import ivf_sq +from cuvs.tests.ann_utils import ( + calc_recall, + generate_data, + run_filtered_search_test, +) + + +def run_ivf_sq_build_search_test( + n_rows=10000, + n_cols=10, + n_queries=100, + k=10, + dtype=np.float32, + add_data_on_build=True, + metric="euclidean", + compare=True, + inplace=True, + search_params={}, + serialize=False, +): + dataset = generate_data((n_rows, n_cols), dtype) + if metric == "inner_product": + dataset = normalize(dataset, norm="l2", axis=1) + dataset_device = device_ndarray(dataset) + + build_params = ivf_sq.IndexParams( + metric=metric, + add_data_on_build=add_data_on_build, + ) + + index = ivf_sq.build(build_params, dataset_device) + + if serialize: + with tempfile.NamedTemporaryFile(suffix=".bin", delete=False) as f: + temp_filename = f.name + ivf_sq.save(temp_filename, index) + index = ivf_sq.load(temp_filename) + + if not add_data_on_build: + dataset_1 = dataset[: n_rows // 2, :] + dataset_2 = dataset[n_rows // 2 :, :] + indices_1 = np.arange(n_rows // 2, dtype=np.int64) + indices_2 = np.arange(n_rows // 2, n_rows, dtype=np.int64) + + dataset_1_device = device_ndarray(dataset_1) + dataset_2_device = device_ndarray(dataset_2) + indices_1_device = device_ndarray(indices_1) + indices_2_device = device_ndarray(indices_2) + index = ivf_sq.extend(index, dataset_1_device, indices_1_device) + index = ivf_sq.extend(index, dataset_2_device, indices_2_device) + + queries = generate_data((n_queries, n_cols), dtype) + out_idx = np.zeros((n_queries, k), dtype=np.int64) + out_dist = np.zeros((n_queries, k), dtype=np.float32) + + queries_device = device_ndarray(queries) + out_idx_device = device_ndarray(out_idx) if inplace else None + out_dist_device = device_ndarray(out_dist) if inplace else None + + search_params = ivf_sq.SearchParams(**search_params) + + ret_output = ivf_sq.search( + search_params, + index, + queries_device, + k, + neighbors=out_idx_device, + distances=out_dist_device, + ) + + if not inplace: + out_dist_device, out_idx_device = ret_output + + if not compare: + return + + out_idx = out_idx_device.copy_to_host() + out_dist = out_dist_device.copy_to_host() + + skl_metric = { + "sqeuclidean": "sqeuclidean", + "inner_product": "cosine", + "cosine": "cosine", + "euclidean": "euclidean", + }[metric] + nn_skl = NearestNeighbors( + n_neighbors=k, algorithm="brute", metric=skl_metric + ) + nn_skl.fit(dataset) + skl_idx = nn_skl.kneighbors(queries, return_distance=False) + + recall = calc_recall(out_idx, skl_idx) + assert recall > 0.7 + + centers = index.centers + assert centers.shape[0] == build_params.n_lists + assert centers.shape[1] == n_cols + + +@pytest.mark.parametrize("inplace", [True, False]) +@pytest.mark.parametrize("dtype", [np.float32]) +@pytest.mark.parametrize( + "metric", ["sqeuclidean", "inner_product", "euclidean", "cosine"] +) +def test_ivf_sq(inplace, dtype, metric): + run_ivf_sq_build_search_test( + dtype=dtype, + inplace=inplace, + metric=metric, + ) + + +@pytest.mark.parametrize("dtype", [np.float32, np.float16]) +@pytest.mark.parametrize("serialize", [True, False]) +def test_extend(dtype, serialize): + run_ivf_sq_build_search_test( + n_rows=10000, + n_cols=10, + n_queries=100, + k=10, + metric="sqeuclidean", + dtype=dtype, + add_data_on_build=False, + serialize=serialize, + ) + + +@pytest.mark.parametrize("sparsity", [0.5, 0.7, 1.0]) +def test_filtered_ivf_sq(sparsity): + run_filtered_search_test(ivf_sq, sparsity) diff --git a/python/cuvs_bench/cuvs_bench/config/algorithms.yaml b/python/cuvs_bench/cuvs_bench/config/algorithms.yaml index fa2195fc61..3a787f65ab 100644 --- a/python/cuvs_bench/cuvs_bench/config/algorithms.yaml +++ b/python/cuvs_bench/cuvs_bench/config/algorithms.yaml @@ -34,6 +34,9 @@ cuvs_ivf_flat: cuvs_ivf_pq: executable: CUVS_IVF_PQ_ANN_BENCH requires_gpu: true +cuvs_ivf_sq: + executable: CUVS_IVF_SQ_ANN_BENCH + requires_gpu: true cuvs_cagra: executable: CUVS_CAGRA_ANN_BENCH requires_gpu: true diff --git a/python/cuvs_bench/cuvs_bench/config/algos/constraints/__init__.py b/python/cuvs_bench/cuvs_bench/config/algos/constraints/__init__.py index 9111bdc3b9..ea2afe351e 100644 --- a/python/cuvs_bench/cuvs_bench/config/algos/constraints/__init__.py +++ b/python/cuvs_bench/cuvs_bench/config/algos/constraints/__init__.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 @@ -50,6 +50,12 @@ def cuvs_cagra_search(params, build_params, k, batch_size): return True +def cuvs_ivf_sq_search(params, build_params, k, batch_size): + if "nlist" in build_params and "nprobe" in params: + return build_params["nlist"] >= params["nprobe"] + return True + + ############################################################################### # FAISS constraints # ############################################################################### diff --git a/python/cuvs_bench/cuvs_bench/config/algos/cuvs_ivf_sq.yaml b/python/cuvs_bench/cuvs_bench/config/algos/cuvs_ivf_sq.yaml new file mode 100644 index 0000000000..adaad54e04 --- /dev/null +++ b/python/cuvs_bench/cuvs_bench/config/algos/cuvs_ivf_sq.yaml @@ -0,0 +1,25 @@ +name: cuvs_ivf_sq +constraints: + search: cuvs_bench.config.algos.constraints.cuvs_ivf_sq_search +groups: + base: + build: + nlist: [1024, 2048, 4096, 8192] + ratio: [1, 2] + niter: [25] + search: + nprobe: [1, 5, 10, 20, 50, 100, 200, 500] + large: + build: + nlist: [8192, 16384, 32000, 64000] + ratio: [2, 4] + niter: [20] + search: + nprobe: [10, 20, 50, 100, 200, 500, 1000, 2000] + test: + build: + nlist: [1024] + ratio: [1] + niter: [20] + search: + nprobe: [1, 5]