Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,40 @@ else
warning('CUDA not found, GPUNETIO plugin will be disabled')
doca_gpunetio_dep = disabler()
endif
# DOCA
doca_gpunetio_dep = dependency('doca-gpunetio', required : false)

# SynapseAI (Habana Gaudi) dependency detection
# Try to find both libSynapse and hl-thunk libraries
synapse_lib = cpp.find_library('Synapse',
dirs: ['/usr/lib/habanalabs', '/usr/local/lib/habanalabs'],
required: false)
hlthunk_lib = cpp.find_library('hl-thunk',
dirs: ['/usr/lib/habanalabs', '/usr/local/lib/habanalabs'],
required: false)

# SynapseAI support requires both libraries
synapseai_dep = dependency('', required: false) # Initialize as not found
if synapse_lib.found() and hlthunk_lib.found()
synapseai_dep = declare_dependency(dependencies: [synapse_lib, hlthunk_lib])
elif hlthunk_lib.found()
# Fallback to just hl-thunk if libSynapse not available
synapseai_dep = hlthunk_lib
endif

if synapseai_dep.found()
# Create proper dependency with include paths (including DRM path for habanalabs headers)
synapseai_dep = declare_dependency(
dependencies: synapseai_dep,
include_directories: [
include_directories('/usr/include/habanalabs'),
include_directories('/usr/include/drm')
]
)
message('Found SynapseAI support for Habana Gaudi devices')
else
warning('SynapseAI not found. Habana Gaudi device support will be disabled.')
endif

# Check for etcd-cpp-api - use multiple methods for discovery
etcd_dep = dependency('etcd-cpp-api', required : false)
Expand Down
1 change: 1 addition & 0 deletions src/plugins/libfabric/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ EFA Specific **Topology-Aware Optimization**: Hardware-aware GPU-to-EFA and NUMA
- **Libfabric**
- Many system will have installed libfabric already. If not, custom libfabric installation is available via https://ofiwg.github.io/libfabric/ - Minimum required version: v1.21.0
- For EFA enabled AWS instances, it is recommanded to install through AWS EFA installer: https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/efa-start.html - Recommend to use the latest version
- **Note:** HMEM support for some GPU implementations (SynapseAI, etc.) requires libfabric v1.16.x or newer

- **hwloc**
- hwloc is used to understand the underlying architecture to optimize application performance. Suggested version: 2.10.0 or newer
Expand Down
80 changes: 72 additions & 8 deletions src/plugins/libfabric/libfabric_backend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,21 @@ nixlLibfabricEngine::nixlLibfabricEngine(const nixlBackendInitParams *init_param
NIXL_DEBUG << "Using default striping threshold: " << striping_threshold_ << " bytes";
}

// Parse default HMEM interface parameter
// Auto-detect from topology if not specified
std::string hmem_iface_str;
if (getInitParam("default_hmem_iface", hmem_iface_str) == NIXL_SUCCESS) {
default_hmem_iface_ = hmem_iface_str;
NIXL_DEBUG << "Using custom default HMEM interface from backend params: " << default_hmem_iface_;
} else {
// Auto-detect device type from topology
// Note: topology discovery happens in rail_manager constructor
// For now, leave empty to use GDR fallback by default
// SynapseAI will be auto-detected per-registration via /dev/accel check
default_hmem_iface_ = "";
NIXL_DEBUG << "No default HMEM interface specified, will auto-detect per-registration";
}

// Initialize Rail Manager which will discover the topology and create all rails.
try {
NIXL_DEBUG << "Rail Manager created with " << rail_manager.getNumDataRails()
Expand Down Expand Up @@ -718,7 +733,7 @@ nixl_mem_list_t
nixlLibfabricEngine::getSupportedMems() const {
nixl_mem_list_t mems;
mems.push_back(DRAM_SEG);
#ifdef HAVE_CUDA
#if defined(HAVE_CUDA) || defined(HAVE_SYNAPSEAI)
mems.push_back(VRAM_SEG);
#endif
return mems;
Expand All @@ -734,9 +749,10 @@ nixlLibfabricEngine::registerMem(const nixlBlobDesc &mem,
priv->length_ = mem.len;
priv->gpu_device_id_ = mem.devId; // Store GPU device ID

#ifdef HAVE_CUDA
// Handle CUDA memory registration with GPU Direct RDMA support
if (nixl_mem == VRAM_SEG) {
#ifdef HAVE_CUDA
// Handle CUDA memory registration with GPU Direct RDMA support

// For multi-GPU support, skip CUDA address workaround
if (cuda_addr_wa_) {
bool need_restart;
Expand All @@ -760,28 +776,76 @@ nixlLibfabricEngine::registerMem(const nixlBlobDesc &mem,
}
NIXL_DEBUG << "Set CUDA device context to GPU " << mem.devId;
}
}
#endif

#ifdef HAVE_SYNAPSEAI
// Handle SynapseAI memory registration
NIXL_DEBUG << "Registering SynapseAI device memory for device " << mem.devId;
// SynapseAI-specific setup would go here if needed
#endif
}

// Initialize vectors to accommodate all possible rails (for indexing consistency)
priv->rail_mr_list_.resize(rail_manager.getNumDataRails(), nullptr);
priv->rail_key_list_.resize(rail_manager.getNumDataRails(), 0);

#ifdef HAVE_CUDA
// Set CUDA context before libfabric operations for VRAM
if (nixl_mem == VRAM_SEG) {
#ifdef HAVE_CUDA
// Set CUDA context before libfabric operations for VRAM
vramApplyCtx();
}
#endif
#ifdef HAVE_SYNAPSEAI
// SynapseAI context application would go here if needed
#endif
}

// Determine HMEM interface hint based on priority:
// 1. Environment variables (highest priority)
// 2. Per-registration hints via metaInfo blob
// 3. Backend-wide defaults from custom params
// 4. Auto-detection (fallback - empty string)
std::string hmem_hint;

// Priority 1: Check environment variables
const char* env_hmem = getenv("HMEM_IFACE");
if (env_hmem && env_hmem[0] != '\0') {
hmem_hint = env_hmem;
NIXL_DEBUG << "Using HMEM interface from environment variable: " << hmem_hint;
}
// Priority 2: Check per-registration hint from metaInfo
else if (!mem.metaInfo.empty()) {
hmem_hint = std::string(mem.metaInfo.begin(), mem.metaInfo.end());
NIXL_DEBUG << "Using HMEM interface from metaInfo hint: " << hmem_hint;
}
// Priority 3: Use backend-wide default
else if (!default_hmem_iface_.empty()) {
hmem_hint = default_hmem_iface_;
NIXL_DEBUG << "Using HMEM interface from backend default: " << hmem_hint;
}
// Priority 4: Auto-detect from system topology
else {
// Auto-detect device type based on topology discovery
// Intel HPU requires FI_HMEM_SYNAPSEAI (no GDR support exists)
// NVIDIA GPU can use GDR fallback (empty hint)
if (nixl_mem == VRAM_SEG && rail_manager.getNumIntelHpus() > 0) {
hmem_hint = "SYNAPSEAI";
NIXL_DEBUG << "Auto-detected Intel HPU system, using HMEM interface: SYNAPSEAI";
} else {
// Leave empty for GDR fallback (CUDA) or DRAM
NIXL_DEBUG << "Auto-detection: using GDR fallback (empty hint)";
}
}

// Use Rail Manager for centralized memory registration with GPU Direct RDMA support
NIXL_TRACE << "Registering memory: addr=" << (void *)mem.addr << " len=" << mem.len
<< " mem_type=" << nixl_mem << " devId=" << mem.devId;
<< " mem_type=" << nixl_mem << " devId=" << mem.devId
<< " hmem_hint=" << (hmem_hint.empty() ? "auto" : hmem_hint);

nixl_status_t status = rail_manager.registerMemory((void *)mem.addr,
mem.len,
nixl_mem,
mem.devId,
hmem_hint,
priv->rail_mr_list_,
priv->rail_key_list_,
priv->selected_rails_);
Expand Down
3 changes: 3 additions & 0 deletions src/plugins/libfabric/libfabric_backend.h
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,9 @@ class nixlLibfabricEngine : public nixlBackendEngine {

mutable size_t total_transfer_size_;

// HMEM interface management
std::string default_hmem_iface_; // Backend-wide default HMEM interface from custom params (default: "cuda")

// Map of agent name to connection info
// <remoteAgent, <connection>>
mutable std::unordered_map<std::string, std::shared_ptr<nixlLibfabricConnection>> connections_;
Expand Down
6 changes: 6 additions & 0 deletions src/plugins/libfabric/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,12 @@ if cuda_dep.found()
compile_flags += ['-DHAVE_CUDA']
endif

# Add SynapseAI support if available (dependency is globally defined)
if synapseai_dep.found()
libfabric_plugin_deps += [synapseai_dep]
compile_flags += ['-DHAVE_SYNAPSEAI']
endif

# Build as static or shared library based on configuration
if 'LIBFABRIC' in static_plugins
libfabric_backend_lib = static_library(
Expand Down
116 changes: 110 additions & 6 deletions src/utils/libfabric/libfabric_common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,87 @@

namespace LibfabricUtils {

// Provider-specific configurations
static const ProviderConfig PROVIDER_CONFIGS[] = {
{"efa",
FI_MSG | FI_RMA | FI_LOCAL_COMM | FI_REMOTE_COMM,
FI_CONTEXT | FI_CONTEXT2,
0, // let provider choose
FI_RM_UNSPEC,
FI_THREAD_SAFE},
{"verbs", // Matches both "verbs" and "verbs;ofi_rxm"
FI_MSG | FI_RMA | FI_READ | FI_WRITE | FI_RECV | FI_SEND | FI_REMOTE_READ | FI_REMOTE_WRITE |
FI_MULTI_RECV | FI_LOCAL_COMM | FI_REMOTE_COMM | FI_HMEM,
0, // no mode flags required
FI_MR_LOCAL | FI_MR_VIRT_ADDR | FI_MR_ALLOCATED | FI_MR_PROV_KEY | FI_MR_HMEM,
FI_RM_ENABLED,
FI_THREAD_SAFE},
{"tcp",
FI_MSG | FI_RMA | FI_LOCAL_COMM | FI_REMOTE_COMM,
FI_CONTEXT | FI_CONTEXT2,
0, // basic MR mode, overridden in rail.cpp
FI_RM_UNSPEC,
FI_THREAD_UNSPEC},
{
"sockets",
FI_MSG | FI_RMA | FI_LOCAL_COMM | FI_REMOTE_COMM,
0,
0, // let provider choose
FI_RM_UNSPEC,
FI_THREAD_UNSPEC // default threading
}};

static const size_t NUM_PROVIDER_CONFIGS = sizeof(PROVIDER_CONFIGS) / sizeof(PROVIDER_CONFIGS[0]);

void
configureHintsForProvider(struct fi_info *hints, const std::string &provider_name) {
const ProviderConfig *config = nullptr;

// Find matching config
// Match order: 1) exact match, 2) prefix match for composite providers (e.g., "verbs;ofi_rxm")
for (size_t i = 0; i < NUM_PROVIDER_CONFIGS; ++i) {
const std::string &config_name = PROVIDER_CONFIGS[i].name;

// Exact match
if (provider_name == config_name) {
config = &PROVIDER_CONFIGS[i];
break;
}

// Composite provider match (e.g., "verbs;ofi_rxm" matches "verbs")
// Check if provider_name starts with config_name followed by ";"
if (provider_name.rfind(config_name + ";", 0) == 0) {
config = &PROVIDER_CONFIGS[i];
break;
}
}

if (!config) {
// Default configuration
NIXL_DEBUG << "No specific config for provider '" << provider_name << "', using defaults";
hints->caps = FI_MSG | FI_RMA | FI_LOCAL_COMM | FI_REMOTE_COMM;
hints->mode = 0;
hints->ep_attr->type = FI_EP_RDM;
return;
}

// Apply provider-specific configuration
hints->caps = config->caps;
hints->mode = config->mode;
hints->ep_attr->type = FI_EP_RDM;

if (config->resource_mgmt != FI_RM_UNSPEC) {
hints->domain_attr->resource_mgmt = config->resource_mgmt;
}

if (config->mr_mode != 0) {
hints->domain_attr->mr_mode = config->mr_mode;
}

if (config->threading != FI_THREAD_UNSPEC) {
hints->domain_attr->threading = config->threading;
}
}

std::pair<std::string, std::vector<std::string>>
getAvailableNetworkDevices() {
Expand All @@ -43,16 +124,24 @@ getAvailableNetworkDevices() {
return {"none", {}};
}

hints->caps = 0;
hints->caps = FI_MSG | FI_RMA; // Basic messaging and RMA
// Check if FI_PROVIDER environment variable is set
const char *env_provider = getenv("FI_PROVIDER");
std::string provider = env_provider && env_provider[0] != '\0' ? env_provider : "";

hints->caps |= FI_LOCAL_COMM | FI_REMOTE_COMM;
hints->mode = FI_CONTEXT;
hints->ep_attr->type = FI_EP_RDM;
if (!provider.empty()) {
hints->fabric_attr->prov_name = strdup(env_provider);
NIXL_INFO << "Using provider from FI_PROVIDER environment: " << env_provider;
// Configure hints based on provider
configureHintsForProvider(hints, provider);
} else {
// Auto-detect: start with default configuration
configureHintsForProvider(hints, "");
}

// Use FI_VERSION(1, 18) for DMABUF and HMEM support
int ret = fi_getinfo(FI_VERSION(1, 18), NULL, NULL, 0, hints, &info);
if (ret) {
NIXL_ERROR << "fi_getinfo failed " << fi_strerror(-ret);
NIXL_ERROR << "fi_getinfo failed: " << fi_strerror(-ret);
fi_freeinfo(hints);
return {"none", {}};
}
Expand Down Expand Up @@ -85,8 +174,23 @@ getAvailableNetworkDevices() {
}
}

// Provider selection priority:
// 1. EFA (AWS Elastic Fabric Adapter)
// 2. verbs;ofi_rxm (explicit verbs with RXM)
// 3. verbs (plain verbs)
// 4. sockets (TCP fallback)

if (provider_device_map.find("efa") != provider_device_map.end()) {
return {"efa", provider_device_map["efa"]};
} else if (provider_device_map.find("verbs;ofi_rxm") != provider_device_map.end()) {
// Explicit verbs with RXM
NIXL_INFO << "Using verbs with RXM for RDM endpoint support";
return {"verbs;ofi_rxm", provider_device_map["verbs;ofi_rxm"]};
} else if (provider_device_map.find("verbs") != provider_device_map.end()) {
// Plain verbs - might not support RDM, but try it
NIXL_WARN << "Using plain verbs provider - may not support RDM endpoints. "
<< "Consider setting FI_PROVIDER=verbs;ofi_rxm for RDM support";
return {"verbs", provider_device_map["verbs"]};
} else if (provider_device_map.find("sockets") != provider_device_map.end()) {
return {"sockets", {provider_device_map["sockets"][0]}};
}
Expand Down
14 changes: 14 additions & 0 deletions src/utils/libfabric/libfabric_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#include <rdma/fi_endpoint.h>
#include <rdma/fi_cm.h>
#include <rdma/fi_rma.h>
#include <rdma/fi_ext.h>

// Libfabric configuration constants
#define NIXL_LIBFABRIC_DEFAULT_CONTROL_RAILS 1
Expand Down Expand Up @@ -142,6 +143,16 @@ struct BinaryNotification {
}
};

// Provider configuration structure
struct ProviderConfig {
std::string name;
uint64_t caps;
uint64_t mode;
uint64_t mr_mode;
fi_resource_mgmt resource_mgmt;
fi_threading threading;
};

// Global XFER_ID management
namespace LibfabricUtils {
// Get next unique XFER_ID
Expand All @@ -163,6 +174,9 @@ getAvailableNetworkDevices();
// String utilities
std::string
hexdump(const void *data);
// Provider configuration helper
void
configureHintsForProvider(struct fi_info* hints, const std::string& provider_name);
} // namespace LibfabricUtils

#endif // NIXL_SRC_UTILS_LIBFABRIC_LIBFABRIC_COMMON_H
Loading
Loading