Skip to content

Commit cd27698

Browse files
committed
feat: add torch_npu_ops library for NPU backend support.
1 parent efb36d9 commit cd27698

17 files changed

+19
-1580
lines changed

.gitmodules

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,4 +27,7 @@
2727
url = https://gitcode.com/xLLM-AI/spdlog.git
2828
[submodule "third_party/Mooncake"]
2929
path = third_party/Mooncake
30-
url = https://gitcode.com/xLLM-AI/Mooncake.git
30+
url = https://gitcode.com/xLLM-AI/Mooncake.git
31+
[submodule "third_party/torch_npu_ops"]
32+
path = third_party/torch_npu_ops
33+
url = https://gitcode.com/xLLM-AI/torch_npu_ops.git

third_party/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ add_subdirectory(etcd_cpp_apiv3)
1212
if(USE_NPU)
1313
add_subdirectory(spdlog)
1414
add_subdirectory(hccl_transfer/hccl_transfer)
15+
add_subdirectory(torch_npu_ops)
1516
endif()
1617
add_subdirectory(Mooncake)
1718

third_party/torch_npu_ops

Submodule torch_npu_ops added at 2bc8f57

xllm/core/kernels/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ cc_library(
2626
ops_api.cpp
2727
DEPS
2828
torch
29-
$<$<BOOL:${USE_NPU}>:npu_kernels>
29+
$<$<BOOL:${USE_NPU}>:torch_npu_kernels>
3030
$<$<BOOL:${USE_MLU}>:mlu_kernels>
3131
$<$<BOOL:${USE_CUDA}>:cuda_kernels>
3232
$<$<BOOL:${USE_ILU}>:ilu_kernels>
Lines changed: 1 addition & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,3 @@
11
include(cc_library)
22

3-
add_subdirectory(xllm_ops)
4-
5-
file(GLOB_RECURSE XLLM_CORE_KERNELS_NPU_HEADER
6-
"${CMAKE_CURRENT_LIST_DIR}/custom_functions_npu/*.h"
7-
"${CMAKE_CURRENT_LIST_DIR}/ops_npu/*.h"
8-
"${CMAKE_CURRENT_LIST_DIR}/*.h"
9-
)
10-
11-
file(GLOB_RECURSE XLLM_CORE_KERNELS_NPU_SRCS
12-
"${CMAKE_CURRENT_LIST_DIR}/custom_functions_npu/*.cpp"
13-
"${CMAKE_CURRENT_LIST_DIR}/ops_npu/*.cpp"
14-
"${CMAKE_CURRENT_LIST_DIR}/*.cpp"
15-
)
16-
17-
cc_library(
18-
NAME
19-
npu_kernels
20-
HDRS
21-
${XLLM_CORE_KERNELS_NPU_HEADER}
22-
SRCS
23-
${XLLM_CORE_KERNELS_NPU_SRCS}
24-
DEPS
25-
:model_context
26-
glog::glog
27-
torch
28-
torch_npu
29-
)
3+
add_subdirectory(xllm_ops)

xllm/core/kernels/npu/attention.cpp

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ void reshape_paged_cache(torch::Tensor& key,
2222
torch::Tensor& k_cache,
2323
std::optional<torch::Tensor>& v_cache,
2424
const torch::Tensor& slot_mapping) {
25-
atb::_npu_reshape_and_cache(
25+
atb::npu_reshape_and_cache(
2626
key, value.value(), k_cache, v_cache.value(), slot_mapping);
2727
}
2828

@@ -35,7 +35,7 @@ void batch_prefill(const torch::Tensor& query,
3535
torch::Tensor& output) {
3636
int64_t num_heads = query.size(-2);
3737
int64_t num_kv_heads = key.size(-2);
38-
atb::_npu_flash_attention(
38+
atb::npu_flash_attention(
3939
query, key, value, mask, seq_len, scale, num_heads, num_kv_heads, output);
4040
}
4141

@@ -51,15 +51,15 @@ void batch_decode(const torch::Tensor& query,
5151
int64_t num_kv_heads = k_cache.size(-2);
5252
auto q = query.view({-1, num_heads, head_size});
5353
auto o = output.view({-1, num_heads, head_size});
54-
atb::_npu_paged_attention(q,
55-
k_cache,
56-
v_cache,
57-
num_kv_heads,
58-
num_heads,
59-
scale,
60-
block_table,
61-
seq_lens,
62-
o);
54+
atb::npu_paged_attention(q,
55+
k_cache,
56+
v_cache,
57+
num_kv_heads,
58+
num_heads,
59+
scale,
60+
block_table,
61+
seq_lens,
62+
o);
6363
}
6464

6565
} // namespace xllm::kernel::npu

xllm/core/kernels/npu/custom_functions_npu/atb_common.cpp

Lines changed: 0 additions & 174 deletions
This file was deleted.

0 commit comments

Comments
 (0)