Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
609b0f3
prune kernel smem
mfoerste4 Feb 16, 2026
a320e0e
reduce copies within reverse graph compute
mfoerste4 Feb 18, 2026
6d1a618
optimize() draft move more compute to GPU
mfoerste4 Feb 19, 2026
77ab079
Merge branch 'rapidsai:main' into cagra_optimize
mfoerste4 Feb 19, 2026
008e0fb
Merge branch 'rapidsai:main' into cagra_optimize
mfoerste4 Feb 20, 2026
822faea
some fixes, cleanup
mfoerste4 Feb 20, 2026
8ed1497
Merge branch 'main' into cagra_optimize
mfoerste4 Feb 24, 2026
9b1f741
some fixes
mfoerste4 Feb 25, 2026
ecf3b1d
extract prune into separate function
mfoerste4 Feb 27, 2026
972d278
extract optimize components
mfoerste4 Mar 2, 2026
5e9ebc5
enable both host/device inout graphs for optimize
mfoerste4 Mar 2, 2026
8f24d9d
resolve conflicts
mfoerste4 Mar 2, 2026
40977e2
smaller fixes
mfoerste4 Mar 2, 2026
14e9f3e
bugfix
mfoerste4 Mar 3, 2026
416558d
fuse and simplify pruning, remove CPU path
mfoerste4 Mar 5, 2026
d8d8bd8
cleanup merge, remove CPU path
mfoerste4 Mar 5, 2026
00c4204
batch reverse creation
mfoerste4 Mar 6, 2026
9e63a7c
add prefetch view to handle managed & host
mfoerste4 Mar 6, 2026
a38ad52
fix batched iterator
mfoerste4 Mar 9, 2026
89b0d1c
implement fallback / simplify strategy
mfoerste4 Mar 9, 2026
d0e3dae
add logging / remove stats compute
mfoerste4 Mar 10, 2026
ec45fd2
add test, persist stream pool, cleanup
mfoerste4 Mar 10, 2026
e43b51b
Merge branch 'main' into cagra_optimize
mfoerste4 Mar 10, 2026
c412138
switch to cooperative groups as __reduce_min_sync causes issues
mfoerste4 Mar 11, 2026
b035ea0
Merge branch 'cagra_optimize' of github.com:mfoerste4/cuvs into cagra…
mfoerste4 Mar 11, 2026
ab01bab
back to column wise reverse graph creation to boost closer connections
mfoerste4 Mar 13, 2026
139774f
Merge branch 'main' into cagra_optimize
mfoerste4 Mar 13, 2026
68f7883
fix signness
mfoerste4 Mar 13, 2026
add206a
stupid me trusting cursor to fix this
mfoerste4 Mar 13, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 10 additions & 11 deletions cpp/src/neighbors/detail/cagra/cagra_build.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -833,27 +833,26 @@ inline std::pair<size_t, size_t> optimize_workspace_size(size_t n_rows,

// Prune stage memory
// We neglect 8 bytes (both on host and device) for stats
size_t prune_host = n_rows * intermediate_degree * sizeof(uint8_t); // detour count
size_t batch_size = std::min(static_cast<size_t>(256 * 1024), n_rows);

size_t prune_dev = n_rows * intermediate_degree * 1; // detour count (uint8_t)
prune_dev += n_rows * sizeof(uint32_t); // d_num_detour_edges
prune_dev += n_rows * intermediate_degree * index_size; // d_input_graph
size_t prune_dev = batch_size * intermediate_degree * 1; // detour count (uint8_t)
prune_dev += batch_size * sizeof(uint32_t); // d_num_detour_edges
prune_dev += n_rows * intermediate_degree * index_size; // d_input_graph

// Reverse graph stage memory
size_t rev_host = n_rows * graph_degree * index_size; // rev_graph
rev_host += n_rows * sizeof(uint32_t); // rev_graph_count
rev_host += n_rows * index_size; // dest_nodes

size_t rev_dev = n_rows * graph_degree * index_size; // d_rev_graph
rev_dev += n_rows * sizeof(uint32_t); // d_rev_graph_count
rev_dev += n_rows * sizeof(uint32_t); // d_dest_nodes

// Memory for merging graphs (host only)
// Memory for merging graphs (host only optional)
size_t combine_host =
n_rows * sizeof(uint32_t) + graph_degree * sizeof(uint32_t); // in_edge_count + hist

size_t total_host = mst_host + std::max({prune_host, rev_host, combine_host});
size_t total_dev = std::max(prune_dev, rev_dev);
// additional memory for combine stage on device
size_t combine_dev = n_rows * graph_degree * index_size; // d_output_graph

size_t total_host = mst_host + combine_host;
size_t total_dev = std::max(prune_dev, rev_dev + combine_dev);

return std::make_pair(total_host, total_dev);
}
Expand Down
Loading
Loading