Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,9 @@ BUILD_DIRS="${LIBCUVS_BUILD_DIR} ${PYTHON_BUILD_DIR} ${RUST_BUILD_DIR} ${JAVA_BU
# Set defaults for vars modified by flags to this script
CMAKE_LOG_LEVEL=""
VERBOSE_FLAG=""
BUILD_TESTS=ON
BUILD_TESTS=OFF
BUILD_MG_ALGOS=ON
BUILD_TYPE=Release
BUILD_TYPE=RelWithDebInfo
COMPILE_LIBRARY=OFF
INSTALL_TARGET=install
BUILD_REPORT_METRICS=""
Expand Down
2 changes: 1 addition & 1 deletion examples/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ function gpuArch {

# Set up build configuration
PARALLEL_LEVEL=${PARALLEL_LEVEL:=$(nproc)}
BUILD_TYPE=Release
BUILD_TYPE=RelWithDebInfo
CUVS_REPO_REL=""
EXTRA_CMAKE_ARGS=()

Expand Down
4 changes: 4 additions & 0 deletions examples/cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ add_executable(IVF_FLAT_EXAMPLE src/ivf_flat_example.cu)
add_executable(IVF_PQ_EXAMPLE src/ivf_pq_example.cu)
add_executable(VAMANA_EXAMPLE src/vamana_example.cu)
add_executable(SCANN_EXAMPLE src/scann_example.cu)
add_executable(THROWS_RAFT_OMM_EXAMPLE src/throws_raft_oom.cu)

# `$<TARGET_NAME_IF_EXISTS:conda_env>` is a generator expression that ensures that targets are
# installed in a conda environment, if one exists
Expand All @@ -56,3 +57,6 @@ target_link_libraries(IVF_PQ_EXAMPLE PRIVATE cuvs::cuvs $<TARGET_NAME_IF_EXISTS:
target_link_libraries(IVF_FLAT_EXAMPLE PRIVATE cuvs::cuvs $<TARGET_NAME_IF_EXISTS:conda_env>)
target_link_libraries(VAMANA_EXAMPLE PRIVATE cuvs::cuvs $<TARGET_NAME_IF_EXISTS:conda_env>)
target_link_libraries(SCANN_EXAMPLE PRIVATE cuvs::cuvs $<TARGET_NAME_IF_EXISTS:conda_env>)
target_link_libraries(
THROWS_RAFT_OMM_EXAMPLE PRIVATE cuvs::cuvs $<TARGET_NAME_IF_EXISTS:conda_env>
)
196 changes: 196 additions & 0 deletions examples/cpp/src/intercept_throw.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION.
* SPDX-License-Identifier: Apache-2.0
*/
#ifndef _GNU_SOURCE
#define _GNU_SOURCE // for dladdr / RTLD_NEXT
#endif
#include <backtrace.h> // DWARF-aware symbolization: to get function name and file:line
#include <cxxabi.h>
#include <dlfcn.h>
#include <link.h> // dl_iterate_phdr, struct dl_phdr_info
#include <new>
#include <pthread.h> // mutex guarding the cached backtrace state
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <typeinfo>
#include <unistd.h>

// Resolve the real __cxa_throw at library load time
// to avoid calling dlsym() during an OOM situation
typedef void (*cxa_throw_t)(void*, std::type_info*, void (*)(void*));

static cxa_throw_t real_cxa_throw = nullptr;

// libbacktrace state. It reads the DWARF info of the executable AND every
// shared object loaded into the process, so it can turn a raw PC into
// function + file:line — something backtrace_symbols/dladdr can never do,
// because they only read the dynamic symbol table (.dynsym).
//
// IMPORTANT libbacktrace limitation: a backtrace_state snapshots the set of
// loaded filenames (via dl_iterate_phdr) exactly once, on its first use, and
// never refreshes it. Any shared object loaded *after* that first use — e.g.
// libcuvs.so / librmm.so, dlopened or lazily bound after some earlier throw
// already triggered initialization — is permanently invisible to that state,
// so its PCs resolve to "?? ??:0" and we fall back to dladdr's .dynsym names
// (which omit -fvisibility=hidden internals, hence the bare "??" frames).
//
// Fix: detect when the loaded-filename set has changed and build a fresh state.
// This happens only a handful of times (each dlopen/lazy-load during startup),
// NOT per throw, so the common path stays a cheap cached-pointer read and the
// per-recreation cost (libbacktrace never frees old states) is bounded.
static pthread_mutex_t bt_lock = PTHREAD_MUTEX_INITIALIZER;
static struct backtrace_state* bt_state = nullptr;
static unsigned long bt_modsig = 0; // signature of the loaded-filename set

// dl_iterate_phdr callback: fold each loaded object into a cheap signature so
// we can tell when a library has been added/removed/relocated.
static int modsig_cb(struct dl_phdr_info* info, size_t /*sz*/, void* data)
{
unsigned long* acc = static_cast<unsigned long*>(data);
*acc = (*acc * 1000003UL) ^ static_cast<unsigned long>(info->dlpi_addr);
return 0;
}
static unsigned long current_modsig()
{
unsigned long acc = 1469598103934665603UL; // FNV-ish seed
dl_iterate_phdr(modsig_cb, &acc);
return acc;
}

// Return a backtrace_state whose filename list covers everything loaded *now*.
// libbacktrace offers no API to refresh a state, so when the filename set has
// changed we create a fresh one (and intentionally leak the old — libbacktrace
// states are never freed by design).
static struct backtrace_state* get_state()
{
unsigned long sig = current_modsig();
pthread_mutex_lock(&bt_lock);
if (!bt_state || sig != bt_modsig) {
// filename = nullptr -> uses /proc/self/exe, handles PIE/ASLR.
// threaded = 1 -> internal state is guarded for use from any thread.
bt_state = backtrace_create_state(nullptr,
/*threaded=*/1,
/*error_cb=*/nullptr,
/*data=*/nullptr);
bt_modsig = sig;
}
struct backtrace_state* s = bt_state;
pthread_mutex_unlock(&bt_lock);
return s;
}

// __attribute__((constructor)) runs when the .so is loaded, before any throws
__attribute__((constructor)) static void init()
{
// resolve the real __cxa_throw at library load time, so we never call
// dlsym() during an OOM because dlsym() itself can throw a bad_alloc.
real_cxa_throw = reinterpret_cast<cxa_throw_t>(dlsym(RTLD_NEXT, "__cxa_throw"));
}

// Emit one resolved frame, async-signal-ish: format into a stack buffer and
// write() directly to fd 2 (no stdio buffering). __cxa_demangle does allocate,
// which is acceptable here: the bad_alloc object is already constructed, so we
// are past the actual allocation failure point.
static void print_frame(
int idx, uintptr_t pc, const char* module, const char* func, const char* file, int line) noexcept
{
const char* name = func ? func : "??";

// Demangle Itanium C++ names (_Z...) -> human readable. Falls back to the
// raw name if it is not a mangled symbol or demangling fails.
char* demangled = nullptr;
if (func && func[0] == '_' && func[1] == 'Z') {
int status = 0;
demangled = abi::__cxa_demangle(func, nullptr, nullptr, &status);
if (status == 0 && demangled) { name = demangled; }
}

char buf[2048];
int n;
if (file) {
n = snprintf(buf,
sizeof buf,
" #%-2d 0x%012lx %s\n at %s:%d (%s)\n",
idx,
static_cast<unsigned long>(pc),
name,
file,
line,
module ? module : "??");
} else {
// No DWARF line info for this module (e.g. stripped libc/libstdc++,
// or the exe built without -g): show name + module + offset only.
n = snprintf(buf,
sizeof buf,
" #%-2d 0x%012lx %s\n in %s\n",
idx,
static_cast<unsigned long>(pc),
name,
module ? module : "??");
}
if (n > 0) {
size_t len = static_cast<size_t>(n) < sizeof buf ? static_cast<size_t>(n) : sizeof buf - 1;
write(STDERR_FILENO, buf, len);
}
free(demangled);
}

// Called once per frame (and once per inlined frame).
// function/file may be NULL and lineno 0 when DWARF is missing for that program counter (PC).
static int frame_callback(
void* data, uintptr_t pc, const char* file, int lineno, const char* function) noexcept
{
int* idx = static_cast<int*>(data);
// Always get the filename path; also use dladdr's .dynsym name as a fallback
// when libbacktrace found no function name (typical for libc/libstdc++
// internals that have neither DWARF nor a static symtab on this system).
const char* module = nullptr;
Dl_info info;
if (dladdr((void*)pc, &info)) {
module = info.dli_fname;
if (!function) {
function = info.dli_sname; // nearest exported dynamic symbol
}
}
print_frame((*idx)++, pc, module, function, file, lineno);
return 0; // 0 = continue walking the stack
}

// Called on a real failure inside libbacktrace (rare). Report and move on.
static void error_callback(void* /*data*/, const char* msg, int /*errnum*/)
{
const char header[] = "=== libbacktrace error ===\n";
write(STDERR_FILENO, header, strlen(header));
if (msg) {
write(STDERR_FILENO, msg, strlen(msg));
}
write(STDERR_FILENO, "\n", 1);
}

extern "C" void __cxa_throw(void* obj, std::type_info* tinfo, void (*dest)(void*))
{
const char header[] = "=== intercepted throw, backtrace ===\n";
write(STDERR_FILENO, header, strlen(header));

// Build/refresh the state against the filenames loaded *right now*, so PCs in
// libraries loaded after an earlier throw (libcuvs.so, librmm.so, ...) still
// resolve to function + file:line instead of dladdr's .dynsym-only "??".
struct backtrace_state* st = get_state();
if (st) {
int idx = 0;
// skip = 1 drops this __cxa_throw hook frame itself, so the trace
// starts at the code that actually threw.
backtrace_full(st, /*skip=*/1, frame_callback, error_callback, &idx);
} else {
const char msg[] = "(libbacktrace state unavailable)\n";
write(STDERR_FILENO, msg, strlen(msg));
}

// call the real __cxa_throw()
real_cxa_throw(obj, tinfo, dest);

__builtin_unreachable();
}
58 changes: 58 additions & 0 deletions examples/cpp/src/throws_raft_oom.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION.
* SPDX-License-Identifier: Apache-2.0
*/
#include <cuvs/neighbors/cagra.hpp>
#include <raft/core/device_mdarray.hpp>
#include <raft/core/resources.hpp>
#include <raft/random/make_blobs.cuh>

#include <cuvs/neighbors/hnsw.hpp>
#include <cuvs/neighbors/ivf_pq.hpp>
#include <cuvs/util/host_memory.hpp>

#include <rmm/mr/pool_memory_resource.hpp>

#include <cstdio>
#include <cstdlib> // for exit
#include <fcntl.h>
#include <stdint.h>
#include <sys/mman.h>
#include <unistd.h>

void throw_host_oom_level_1()
{
raft::resources res;
raft::resource::set_workspace_to_pool_resource(res, 2 * 1024 * 1024 * 1024ull);
int n_rows = 1024 * 1024 * 1024;
int n_cols = 3;
auto matrix = raft::make_host_matrix<float>(res, n_rows, n_cols);
}

void throw_host_oom() { throw_host_oom_level_1(); }

void throw_other_host_exception() { throw std::runtime_error("test exception"); }

void throw_device_oom_level_1()
{
raft::resources res;
raft::resource::set_workspace_to_pool_resource(res, 2 * 1024 * 1024 * 1024ull);
int n_rows = 1024 * 1024 * 1024;
int n_cols = 3;
auto matrix = raft::make_device_matrix<float>(res, n_rows, n_cols);
}

void throw_device_oom() { throw_device_oom_level_1(); }

// Usage:
// Conda build (requires backtrace library):
// g++ -O2 -g -fPIC -shared -I"$CONDA_PREFIX/include" -o src/intercept_throw.so src/intercept_throw.cpp "$CONDA_PREFIX/lib/libbacktrace.a" -lpthread -ldl
// Run:
// LD_PRELOAD=src/intercept_throw.so ./build/THROWS_RAFT_OMM_EXAMPLE
int main()
{
// throw_host_oom();
// throw_other_host_exception();
throw_device_oom();
return 0;
}
Loading