diff --git a/cpp/src/neighbors/detail/cagra/cagra_build.cuh b/cpp/src/neighbors/detail/cagra/cagra_build.cuh index 96ff8344d3..44b011dd28 100644 --- a/cpp/src/neighbors/detail/cagra/cagra_build.cuh +++ b/cpp/src/neighbors/detail/cagra/cagra_build.cuh @@ -68,6 +68,17 @@ void check_graph_degree(size_t& intermediate_degree, size_t& graph_degree, size_ intermediate_degree); graph_degree = intermediate_degree; } + + const size_t recommended_intermediate = graph_degree + ((graph_degree + 1) / 2); + if (intermediate_degree < recommended_intermediate) { + RAFT_LOG_WARN( + "CAGRA: intermediate_graph_degree (%lu) is below the recommended minimum (%lu, i.e. " + "1.5 * graph_degree). Pruning to graph_degree (%lu) may result in a disconnected graph" + " if the intermediate graph does not retain enough neighborhood information.", + intermediate_degree, + recommended_intermediate, + graph_degree); + } } // ACE: Get partition labels for partitioned approach diff --git a/cpp/src/neighbors/detail/cagra/graph_core.cuh b/cpp/src/neighbors/detail/cagra/graph_core.cuh index 76992cc7d5..f7d8780c69 100644 --- a/cpp/src/neighbors/detail/cagra/graph_core.cuh +++ b/cpp/src/neighbors/detail/cagra/graph_core.cuh @@ -1679,11 +1679,18 @@ void prune_graph_gpu( raft::copy(res, host_stats.view(), raft::make_const_mdspan(dev_stats.view())); raft::resource::sync_stream(res); + const size_t recommended_intermediate = output_graph_degree + ((output_graph_degree + 1) / 2); RAFT_EXPECTS( invalid_neighbor_list == 0, - "Could not generate an intermediate CAGRA graph because the initial kNN graph contains too " - "many invalid or duplicated neighbor nodes. This error can occur, for example, if too many " - "overflows occur during the norm computation between the dataset vectors."); + "CAGRA graph pruning failed: could not select graph_degree (%lu) distinct neighbors for " + "every node from the intermediate kNN graph (intermediate_graph_degree=%lu). This usually " + "means the intermediate graph does not encode enough neighborhood information for pruning — " + "for example when intermediate_graph_degree is too close to graph_degree. Set " + "intermediate_graph_degree >= %lu (1.5 * graph_degree). " + "Other causes include invalid or duplicate neighbor indices in the intermediate kNN graph.", + output_graph_degree, + knn_graph_degree, + recommended_intermediate); num_keep = host_stats.data_handle()[0]; num_full = host_stats.data_handle()[1];