diff --git a/cpp/src/neighbors/detail/cagra/cagra_helpers.cpp b/cpp/src/neighbors/detail/cagra/cagra_helpers.cpp index 3f79df47dd..818e4dcac4 100644 --- a/cpp/src/neighbors/detail/cagra/cagra_helpers.cpp +++ b/cpp/src/neighbors/detail/cagra/cagra_helpers.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ @@ -84,7 +84,16 @@ std::tuple optimize_workspace_size(size_t n_rows } size_t combine_dev = combine_dev_fixed; - size_t total_host = mst_host + combine_host; + size_t debug_host_size = 0; + if (raft::default_logger().should_log(rapids_logger::level_enum::debug)) { + // cagra::detail::graph::optimize() allocates extra memory to calculate + // graph metrics when debug logging is enabled + debug_host_size = n_rows * graph_degree * sizeof(uint32_t) // host_copy_output_graph + + n_rows * sizeof(uint32_t) // in_edge_count + + graph_degree * sizeof(uint32_t); // hist + } + + size_t total_host = mst_host + combine_host + debug_host_size; size_t total_host_fixed = mst_host_fixed + combine_host_fixed; size_t total_dev = std::max(prune_dev, rev_dev + combine_dev); size_t total_dev_fixed = std::max(prune_dev_fixed, combine_dev_fixed); @@ -99,8 +108,7 @@ inline std::pair ivf_pq_build_mem_usage( cudaDataType_t dtype, cuvs::neighbors::graph_build_params::ivf_pq_params params, size_t graph_degree, - size_t intermediate_graph_degree, - bool guarantee_connectivity) + size_t intermediate_graph_degree) { size_t dtype_size = cuda_data_type_size(dtype); bool input_is_float = (dtype == CUDA_R_32F); @@ -125,6 +133,10 @@ inline std::pair ivf_pq_build_mem_usage( params.build_params.n_lists)); size_t kmeans_n_rows = n_rows / kmeans_trainset_ratio; size_t kmeans_gpu_mem = kmeans_n_rows * dim * sizeof(float); + if (dtype != CUDA_R_32F) { + // kmeans trainset tmp allocation + kmeans_gpu_mem += kmeans_n_rows * dim * dtype_size; + } // For non-float input, ivf_pq::build first samples into a temporary trainset of type T if (!input_is_float) { kmeans_gpu_mem += kmeans_n_rows * dim * dtype_size; } @@ -208,8 +220,7 @@ std::pair cagra_build_mem_usage(raft::resources const& res, dtype, pq_params, cparams.graph_degree, - cparams.intermediate_graph_degree, - cparams.guarantee_connectivity); + cparams.intermediate_graph_degree); } else if (std::holds_alternative( cparams.graph_build_params)) { RAFT_LOG_INFO("Considering CAGRA in memory build with NN-descent");