Skip to content
This repository was archived by the owner on Sep 23, 2024. It is now read-only.

Commit 76d5f0f

Browse files
committed
use numa to alloc mem
(cherry picked from commit 96b9a59)
1 parent 3f31227 commit 76d5f0f

5 files changed

Lines changed: 44 additions & 22 deletions

File tree

src/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ target_compile_features(${PROJECT_NAME} PRIVATE cxx_std_17)
1818
if(CPU_EXTENSIONS_ENABLE_LOG)
1919
target_compile_definitions(${PROJECT_NAME} PRIVATE ENABLE_LOG)
2020
endif()
21+
target_link_libraries(${PROJECT_NAME} PUBLIC numa)
2122

2223
set(CMAKE_DST lib/cmake/${PROJECT_NAME})
2324
# header files

src/common/memory_alloc.cpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
// Copyright (C) 2018-2023 Intel Corporation
2+
// SPDX-License-Identifier: Apache-2.0
3+
//
4+
#include <cstdlib>
5+
#include <stdio.h>
6+
#include <stdlib.h>
7+
#include <stdint.h>
8+
#include <sched.h>
9+
#include <numa.h>
10+
#include "memory_alloc.hpp"
11+
12+
void* llmdnn_alloc(size_t aligned_size, size_t size, bool hint_numa) {
13+
if (hint_numa && numa_available() != -1) {
14+
int cur_cpu = sched_getcpu();
15+
auto cur_numa_node = numa_node_of_cpu(cur_cpu);
16+
return numa_alloc_onnode(size, cur_numa_node);
17+
} else {
18+
return aligned_alloc(aligned_size, size);
19+
}
20+
}
21+
22+
void llmdnn_free(void* p, size_t size, bool hint_numa) {
23+
if (hint_numa && numa_available() != -1) {
24+
numa_free(p, size);
25+
} else {
26+
::free(p);
27+
}
28+
}

src/common/memory_alloc.hpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
// Copyright (C) 2018-2023 Intel Corporation
2+
// SPDX-License-Identifier: Apache-2.0
3+
//
4+
5+
#pragma once
6+
7+
#include <memory.h>
8+
9+
void* llmdnn_alloc(size_t aligned_size, size_t size, bool hint_numa = true);
10+
void llmdnn_free(void* p, size_t size, bool hint_numa = true);

src/common/tensor2d.hpp

Lines changed: 5 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,7 @@
99
#include <iostream>
1010
#include <functional>
1111
#include <assert.h>
12-
#ifdef ENABLE_NUMA
13-
#include "numa.h"
14-
#endif
12+
#include "memory_alloc.hpp"
1513
#include "log.hpp"
1614
#include "bf16.hpp"
1715

@@ -30,7 +28,7 @@ struct tensor2D {
3028
tensor2D() = default;
3129
tensor2D(const tensor2D&) = delete;
3230
~tensor2D() {
33-
if (own && data) ::free(data);
31+
if (own && data) llmdnn_free(data, capacity);
3432
}
3533

3634
operator bool() {
@@ -104,22 +102,11 @@ struct tensor2D {
104102
if (capacity < need_capacity) {
105103
if (!is_const)
106104
need_capacity *= 2;
107-
capacity = need_capacity;
108105
// align begin address to cache line is vital, so tile load can
109106
// use all bandwidth (L1D/L2 only deliver data in unit of 64-byte aligned cache-line)
110-
111-
#ifdef ENABLE_NUMA
112-
if (USE_NUMA) {
113-
data = std::shared_ptr<T>(
114-
reinterpret_cast<T*>(numa_alloc_local(capacity)),
115-
[need_capacity](void * p){ numa_free(p, need_capacity); });
116-
} else {
117-
#else
118-
{
119-
#endif
120-
if (data) ::free(data);
121-
data = reinterpret_cast<T*>(aligned_alloc(64, capacity));
122-
}
107+
if (data) llmdnn_free(data, capacity);
108+
data = reinterpret_cast<T*>(llmdnn_alloc(64, need_capacity));
109+
capacity = need_capacity;
123110
if (is_const)
124111
memset(static_cast<void*>(data), 0, need_capacity);
125112
if (reinterpret_cast<uintptr_t>(data) % 64)

src/mm_kernel_common_amx.hpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,6 @@
1616
#include <x86intrin.h>
1717
#endif
1818

19-
#ifdef ENABLE_NUMA
20-
#include "numa.h"
21-
#endif
22-
2319
using namespace llmdnn;
2420

2521
namespace amx_kernel {

0 commit comments

Comments
 (0)