Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 34 additions & 26 deletions cpp/bench/ann/src/cuvs/cuvs_ann_bench_param_parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -341,51 +341,59 @@ void parse_build_param(const nlohmann::json& conf, cuvs::neighbors::cagra::index
params.guarantee_connectivity = conf.at("guarantee_connectivity");
}

// Override the graph_build_algo if requested explicitly
if (conf.contains("variable_graph_degree_fraction")) {
params.variable_graph_degree_fraction = conf.at("variable_graph_degree_fraction");
}

// Extract build-algo-specific parameters
nlohmann::json ivf_pq_build_conf = collect_conf_with_prefix(conf, "ivf_pq_build_");
nlohmann::json ivf_pq_search_conf = collect_conf_with_prefix(conf, "ivf_pq_search_");
nlohmann::json nn_descent_conf = collect_conf_with_prefix(conf, "nn_descent_");
nlohmann::json ace_conf = collect_conf_with_prefix(conf, "ace_");

// Determine and initialize graph build algorithm.
// Priority 1: explicit "graph_build_algo" config key.
// Priority 2: infer from algorithm-specific prefixed config keys (only when monostate).
// Priority 3: leave as-is (from prior heuristics or monostate for AUTO at build time).
std::string graph_build_algo;
if (conf.contains("graph_build_algo")) {
if (conf.at("graph_build_algo") == "IVF_PQ") {
graph_build_algo = conf.at("graph_build_algo");
} else if (std::holds_alternative<std::monostate>(params.graph_build_params)) {
if (!ivf_pq_build_conf.empty() || !ivf_pq_search_conf.empty()) {
graph_build_algo = "IVF_PQ";
} else if (!nn_descent_conf.empty()) {
graph_build_algo = "NN_DESCENT";
} else if (!ace_conf.empty()) {
graph_build_algo = "ACE";
}
// else: leave as monostate → AUTO in cagra_build.cuh
}

if (!graph_build_algo.empty()) {
if (graph_build_algo == "IVF_PQ") {
if (!std::holds_alternative<cuvs::neighbors::graph_build_params::ivf_pq_params>(
params.graph_build_params)) {
params.graph_build_params = cuvs::neighbors::graph_build_params::ivf_pq_params{};
}
} else if (conf.at("graph_build_algo") == "NN_DESCENT") {
} else if (graph_build_algo == "NN_DESCENT") {
if (!std::holds_alternative<cuvs::neighbors::graph_build_params::nn_descent_params>(
params.graph_build_params)) {
params.graph_build_params = cuvs::neighbors::graph_build_params::nn_descent_params{};
params.graph_build_params = cuvs::neighbors::graph_build_params::nn_descent_params(
params.intermediate_graph_degree, params.metric);
}
} else if (conf.at("graph_build_algo") == "ACE") {
} else if (graph_build_algo == "ACE") {
if (!std::holds_alternative<cuvs::neighbors::graph_build_params::ace_params>(
params.graph_build_params)) {
params.graph_build_params = cuvs::neighbors::graph_build_params::ace_params{};
}
} else if (conf.at("graph_build_algo") == "ITERATIVE_SEARCH") {
} else if (graph_build_algo == "ITERATIVE_SEARCH") {
if (!std::holds_alternative<cuvs::neighbors::graph_build_params::iterative_search_params>(
params.graph_build_params)) {
params.graph_build_params = cuvs::neighbors::graph_build_params::iterative_search_params{};
}
}
}

// Parse build-algo-specific parameters and use them to decide on the algo type
nlohmann::json ivf_pq_build_conf = collect_conf_with_prefix(conf, "ivf_pq_build_");
nlohmann::json ivf_pq_search_conf = collect_conf_with_prefix(conf, "ivf_pq_search_");
nlohmann::json nn_descent_conf = collect_conf_with_prefix(conf, "nn_descent_");
nlohmann::json ace_conf = collect_conf_with_prefix(conf, "ace_");

// When graph_build_algo is not specified, leave graph_build_params as monostate so the
// CAGRA build uses AUTO selection (NN_DESCENT or IVF_PQ based on dataset/heuristics).
// Only infer from algo-specific config keys when present.
if (std::holds_alternative<std::monostate>(params.graph_build_params)) {
if (!ivf_pq_build_conf.empty() || !ivf_pq_search_conf.empty()) {
params.graph_build_params = cuvs::neighbors::graph_build_params::ivf_pq_params{};
} else if (!nn_descent_conf.empty()) {
params.graph_build_params = cuvs::neighbors::graph_build_params::nn_descent_params{};
} else if (!ace_conf.empty()) {
params.graph_build_params = cuvs::neighbors::graph_build_params::ace_params{};
}
// else: leave as monostate → AUTO in cagra_build.cuh
}

// Apply build-algo-specific parameters
std::visit(
[&](auto& arg) {
Expand Down
26 changes: 26 additions & 0 deletions cpp/include/cuvs/neighbors/cagra.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,11 +146,37 @@ enum class hnsw_heuristic_type : uint32_t {
SAME_GRAPH_FOOTPRINT = 1
};

/**
* Sentinel marking an invalid / absent neighbor in a CAGRA graph. Variable-degree
* graphs (see index_params::variable_graph_degree_fraction) pad unused neighbor
* slots with this value, and consumers should treat it as "end of neighbor list".
*/
template <typename IdxT>
constexpr static IdxT kInvalidNeighbor = static_cast<IdxT>(-1);

struct index_params : cuvs::neighbors::index_params {
/** Degree of input graph for pruning. */
size_t intermediate_graph_degree = 128;
/** Degree of output graph. */
size_t graph_degree = 64;
/**
* Fraction of output graph_degree to define the minimum output graph degree,
* allowing variable-degree neighbor graphs.
*
* This fraction is used as the target for low-detour edges
* during the pruning step. Must be in (0, 1]. The default value of 1.0
* disables variable-degree logic (normal CAGRA behavior). Values < 1.0
* enable variable-degree graphs: the optimize step finds the minimum detour
* threshold that covers at least ceil(graph_degree * fraction) edges per node,
* then lets reverse edges expand the degree further. Unused slots are filled
* with a sentinel value (`kInvalidNeighbor`).
*
* This is intended for the CAGRA-to-HNSW conversion pipeline: the resulting
* graph, when imported into hnswlib, produces variable-degree neighbor lists
* similar to natively-built HNSW graphs. Do not use this with CAGRA's native
* GPU search.
*/
double variable_graph_degree_fraction = 1.0;
/**
* Specify compression parameters if compression is desired. If set, overrides the
* attach_dataset_on_build (and the compressed dataset is always added to the index).
Expand Down
12 changes: 7 additions & 5 deletions cpp/src/neighbors/cagra.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION.
* SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION.
* SPDX-License-Identifier: Apache-2.0
*/

Expand Down Expand Up @@ -40,13 +40,15 @@ cagra::index_params index_params::from_hnsw_params(raft::matrix_extent<int64_t>
cagra::index_params params;
switch (heuristic) {
case hnsw_heuristic_type::SAME_GRAPH_FOOTPRINT:
params.graph_degree = M * 2;
params.intermediate_graph_degree = M * 3;
params.graph_degree = M * 2;
params.intermediate_graph_degree = M * 3;
params.variable_graph_degree_fraction = 0.35;
break;
case hnsw_heuristic_type::SIMILAR_SEARCH_PERFORMANCE:
default:
params.graph_degree = 2 + M * 2 / 3;
params.intermediate_graph_degree = M + M * ef_construction / 256;
params.graph_degree = M;
params.intermediate_graph_degree = M + M * ef_construction / 256;
params.variable_graph_degree_fraction = 0.7;
break;
}
params.graph_build_params =
Expand Down
6 changes: 4 additions & 2 deletions cpp/src/neighbors/cagra.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -259,9 +259,11 @@ void optimize(
raft::resources const& res,
raft::mdspan<IdxT, raft::matrix_extent<int64_t>, raft::row_major, g_accessor> knn_graph,
raft::host_matrix_view<IdxT, int64_t, raft::row_major> new_graph,
const bool guarantee_connectivity = false)
const bool guarantee_connectivity = false,
const double variable_graph_degree_fraction = 1.0)
{
detail::optimize(res, knn_graph, new_graph, guarantee_connectivity);
detail::optimize(
res, knn_graph, new_graph, guarantee_connectivity, variable_graph_degree_fraction);
}

template <typename T,
Expand Down
35 changes: 29 additions & 6 deletions cpp/src/neighbors/detail/cagra/cagra_build.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -411,6 +411,12 @@ void ace_adjust_sub_graph_ids(
size_t j = sub_search_graph(i, k);
size_t j_original;

if (j == kInvalidNeighbor<IdxT>) {
// Variable-degree padding / invalid-neighbor sentinel: propagate as-is.
search_graph(i_original, k) = kInvalidNeighbor<IdxT>;
continue;
}

if (j < core_sub_dataset_size) {
// core partition neighbor: local → core reordered → original
size_t j_reordered = j + core_partition_offsets(partition_id);
Expand Down Expand Up @@ -443,6 +449,11 @@ void ace_adjust_sub_graph_ids_disk(
for (size_t i = 0; i < core_sub_dataset_size; i++) {
for (size_t k = 0; k < graph_degree; k++) {
size_t j = sub_search_graph(i, k);
if (j == kInvalidNeighbor<IdxT>) {
// Variable-degree padding / invalid-neighbor sentinel: propagate as-is.
sub_search_graph(i, k) = kInvalidNeighbor<IdxT>;
continue;
}
if (j < core_sub_dataset_size) {
// core partition neighbor: local → core reordered
sub_search_graph(i, k) = j + core_partition_offsets(partition_id);
Expand Down Expand Up @@ -1930,7 +1941,8 @@ void optimize(
raft::resources const& res,
raft::mdspan<IdxT, raft::matrix_extent<int64_t>, raft::row_major, g_accessor> knn_graph,
raft::host_matrix_view<IdxT, int64_t, raft::row_major> new_graph,
const bool guarantee_connectivity = false)
const bool guarantee_connectivity = false,
const double variable_graph_degree_fraction = 1.0)
{
using internal_IdxT = typename std::make_unsigned<IdxT>::type;

Expand All @@ -1947,8 +1959,12 @@ void optimize(
knn_graph.extent(0),
knn_graph.extent(1));

cagra::detail::graph::optimize(
res, knn_graph_internal, new_graph_internal, guarantee_connectivity);
cagra::detail::graph::optimize(res,
knn_graph_internal,
new_graph_internal,
guarantee_connectivity,
true,
variable_graph_degree_fraction);
}

// RAII wrapper for allocating memory with Transparent HugePage
Expand Down Expand Up @@ -2168,8 +2184,11 @@ auto iterative_build_graph(
auto next_graph_size = curr_query_size;
cagra_graph = raft::make_host_matrix<IdxT, int64_t>(0, 0); // delete existing grahp
cagra_graph = raft::make_host_matrix<IdxT, int64_t>(next_graph_size, next_graph_degree);
optimize<IdxT>(
res, neighbors_view, cagra_graph.view(), flag_last ? params.guarantee_connectivity : 0);
optimize<IdxT>(res,
neighbors_view,
cagra_graph.view(),
flag_last ? params.guarantee_connectivity : false,
flag_last ? params.variable_graph_degree_fraction : 1.0);

auto end = std::chrono::high_resolution_clock::now();
auto elapsed_ms = std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
Expand Down Expand Up @@ -2289,7 +2308,11 @@ index<T, IdxT> build(
cagra_graph = raft::make_host_matrix<IdxT, int64_t>(dataset.extent(0), graph_degree);

RAFT_LOG_TRACE("optimizing graph");
optimize<IdxT>(res, knn_graph->view(), cagra_graph.view(), params.guarantee_connectivity);
optimize<IdxT>(res,
knn_graph->view(),
cagra_graph.view(),
params.guarantee_connectivity,
params.variable_graph_degree_fraction);

// free intermediate graph before trying to create the index
knn_graph.reset();
Expand Down
2 changes: 2 additions & 0 deletions cpp/src/neighbors/detail/cagra/cagra_helpers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ std::tuple<size_t, size_t, size_t, size_t> optimize_workspace_size(size_t n_rows

size_t prune_dev = n_rows * intermediate_degree * index_size; // d_input_graph
prune_dev += prune_dev_fixed;
// d_natural_degree (only allocated when variable_graph_degree_fraction < 1.0)
prune_dev += n_rows * sizeof(uint32_t);

// Reverse graph stage memory
size_t rev_dev = n_rows * graph_degree * index_size; // d_rev_graph
Expand Down
10 changes: 8 additions & 2 deletions cpp/src/neighbors/detail/cagra/cagra_serialize.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -198,8 +198,14 @@ void serialize_to_hnswlib(
size_t bytes_written = 0;
float GiB = 1 << 30;
for (std::size_t i = 0; i < index_.size(); i++) {
auto graph_degree = static_cast<int>(index_.graph_degree());
os.write(reinterpret_cast<char*>(&graph_degree), sizeof(int));
int actual_degree = static_cast<int>(index_.graph_degree());
for (int j = 0; j < actual_degree; j++) {
if (host_graph(i, j) == static_cast<IdxT>(-1)) {
actual_degree = j;
break;
}
}
os.write(reinterpret_cast<char*>(&actual_degree), sizeof(int));

IdxT* graph_row = &host_graph(i, 0);
os.write(reinterpret_cast<char*>(graph_row), sizeof(IdxT) * index_.graph_degree());
Expand Down
Loading
Loading