Skip to content

Commit c67e7c0

Browse files
committed
feat: add NUMA node awareness and pin threads accordingly
1 parent ca2c428 commit c67e7c0

1 file changed

Lines changed: 98 additions & 1 deletion

File tree

lib/linux/backend/iouring.c

Lines changed: 98 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
1+
#include <stdio.h>
12
#include <stdlib.h>
3+
#include <string.h>
4+
#include <sched.h>
25
#include <sys/utsname.h>
36
#include "hmll/hmll.h"
47
#include "hmll/memory.h"
@@ -14,6 +17,61 @@
1417
#include <driver_types.h>
1518
#endif
1619

20+
/* ── NUMA topology helpers ──────────────────────────────────────────── */
21+
22+
/**
23+
* Get the NUMA node for a CUDA device by reading sysfs via the PCI bus ID.
24+
* Returns -1 on failure.
25+
*/
26+
static int hmll_get_gpu_numa_node(const int device_idx)
27+
{
28+
#if defined(__HMLL_CUDA_ENABLED__)
29+
char pci_bus_id[64] = {0};
30+
if (cudaDeviceGetPCIBusId(pci_bus_id, sizeof(pci_bus_id), device_idx) != cudaSuccess)
31+
return -1;
32+
33+
/* Convert to lowercase for sysfs path (CUDA returns uppercase hex) */
34+
for (char *p = pci_bus_id; *p; p++)
35+
*p = (*p >= 'A' && *p <= 'Z') ? (*p + 32) : *p;
36+
37+
char path[256];
38+
snprintf(path, sizeof(path), "/sys/bus/pci/devices/%s/numa_node", pci_bus_id);
39+
40+
FILE *f = fopen(path, "r");
41+
if (!f) return -1;
42+
43+
int node = -1;
44+
if (fscanf(f, "%d", &node) != 1) node = -1;
45+
fclose(f);
46+
47+
return node;
48+
#else
49+
(void)device_idx;
50+
return -1;
51+
#endif
52+
}
53+
54+
/**
55+
* Get the first CPU core on a given NUMA node by parsing sysfs.
56+
* Returns -1 on failure.
57+
*/
58+
static int hmll_get_first_cpu_on_node(const int numa_node)
59+
{
60+
if (numa_node < 0) return -1;
61+
62+
char path[256];
63+
snprintf(path, sizeof(path), "/sys/devices/system/node/node%d/cpulist", numa_node);
64+
65+
FILE *f = fopen(path, "r");
66+
if (!f) return -1;
67+
68+
int first_cpu = -1;
69+
if (fscanf(f, "%d", &first_cpu) != 1) first_cpu = -1;
70+
fclose(f);
71+
72+
return first_cpu;
73+
}
74+
1775
/* ── runtime kernel version detection ───────────────────────────────── */
1876
static inline unsigned hmll_kernel_version_internal(unsigned maj, unsigned min)
1977
{
@@ -80,6 +138,7 @@ static struct hmll_error hmll_io_uring_register_staging_buffers(
80138
}
81139

82140
unsigned char *arena = hmll_alloc(HMLL_URING_QUEUE_DEPTH * HMLL_URING_BUFFER_SIZE, device, HMLL_MEM_STAGING);
141+
83142
if (!arena) {
84143
ctx->error = HMLL_ERR(HMLL_ERR_ALLOCATION_FAILED);
85144
return ctx->error;
@@ -760,8 +819,19 @@ static struct hmll_error hmll_io_uring_queue_init(
760819
const struct hmll_device device
761820
) {
762821
(void)ctx;
822+
823+
/* Detect NUMA node for the target device and pin SQPOLL thread accordingly */
824+
int numa_node = -1;
825+
int sq_cpu = 0;
826+
827+
if (hmll_device_is_cuda(device)) {
828+
numa_node = hmll_get_gpu_numa_node(device.idx);
829+
int cpu = hmll_get_first_cpu_on_node(numa_node);
830+
if (cpu >= 0) sq_cpu = cpu;
831+
}
832+
763833
struct io_uring_params params = {
764-
.sq_thread_cpu = 0,
834+
.sq_thread_cpu = (unsigned)sq_cpu,
765835
.flags = hmll_io_uring_get_setup_flags(),
766836
.sq_thread_idle = 500
767837
};
@@ -773,6 +843,33 @@ static struct hmll_error hmll_io_uring_queue_init(
773843
return HMLL_ERR(HMLL_ERR_CUDA_SET_DEVICE_FAILED);
774844
}
775845

846+
/* Pin this thread to the GPU's NUMA node for optimal memory allocation */
847+
if (numa_node >= 0) {
848+
cpu_set_t cpuset;
849+
CPU_ZERO(&cpuset);
850+
char path[256];
851+
snprintf(path, sizeof(path), "/sys/devices/system/node/node%d/cpulist", numa_node);
852+
FILE *f = fopen(path, "r");
853+
if (f) {
854+
char buf[1024] = {0};
855+
if (fgets(buf, sizeof(buf), f)) {
856+
/* Parse cpulist format: "0-23,48-71" */
857+
char *tok = strtok(buf, ",\n");
858+
while (tok) {
859+
int lo, hi;
860+
if (sscanf(tok, "%d-%d", &lo, &hi) == 2) {
861+
for (int c = lo; c <= hi; c++) CPU_SET(c, &cpuset);
862+
} else if (sscanf(tok, "%d", &lo) == 1) {
863+
CPU_SET(lo, &cpuset);
864+
}
865+
tok = strtok(NULL, ",\n");
866+
}
867+
}
868+
fclose(f);
869+
sched_setaffinity(0, sizeof(cpuset), &cpuset);
870+
}
871+
}
872+
776873
struct hmll_io_uring_cuda_context *data = calloc(HMLL_URING_QUEUE_DEPTH, sizeof(struct hmll_io_uring_cuda_context));
777874
backend->device_ctx = (void *)data;
778875

0 commit comments

Comments
 (0)