jd-opensource
diff --git a/‎.gitmodules‎
Lines changed: 4 additions & 1 deletion b/‎.gitmodules‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎third_party/CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions b/‎third_party/CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎third_party/torch_npu_ops‎ b/‎third_party/torch_npu_ops‎
diff --git a/‎xllm/core/kernels/CMakeLists.txt‎
Lines changed: 1 addition & 1 deletion b/‎xllm/core/kernels/CMakeLists.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎xllm/core/kernels/npu/CMakeLists.txt‎
Lines changed: 1 addition & 27 deletions b/‎xllm/core/kernels/npu/CMakeLists.txt‎
Lines changed: 1 addition & 27 deletions
diff --git a/‎xllm/core/kernels/npu/attention.cpp‎
Lines changed: 11 additions & 11 deletions b/‎xllm/core/kernels/npu/attention.cpp‎
Lines changed: 11 additions & 11 deletions
diff --git a/‎xllm/core/kernels/npu/custom_functions_npu/atb_common.cpp‎
Lines changed: 0 additions & 174 deletions b/‎xllm/core/kernels/npu/custom_functions_npu/atb_common.cpp‎
Lines changed: 0 additions & 174 deletions
@@ -27,4 +27,7 @@
 	url = https://gitcode.com/xLLM-AI/spdlog.git
 [submodule "third_party/Mooncake"]
 	path = third_party/Mooncake
-	url = https://gitcode.com/xLLM-AI/Mooncake.git
+	url = https://gitcode.com/xLLM-AI/Mooncake.git
+[submodule "third_party/torch_npu_ops"]
+	path = third_party/torch_npu_ops
+	url = https://gitcode.com/xLLM-AI/torch_npu_ops.git
@@ -12,6 +12,7 @@ add_subdirectory(etcd_cpp_apiv3)
 if(USE_NPU)
   add_subdirectory(spdlog)
   add_subdirectory(hccl_transfer/hccl_transfer)
+  add_subdirectory(torch_npu_ops)
 endif()
 add_subdirectory(Mooncake)
 
 
@@ -26,7 +26,7 @@ cc_library(
     ops_api.cpp
   DEPS
     torch
-    $<$<BOOL:${USE_NPU}>:npu_kernels>
+    $<$<BOOL:${USE_NPU}>:torch_npu_kernels>
     $<$<BOOL:${USE_MLU}>:mlu_kernels>
     $<$<BOOL:${USE_CUDA}>:cuda_kernels>
     $<$<BOOL:${USE_ILU}>:ilu_kernels>
 
@@ -1,29 +1,3 @@
 include(cc_library)
 
-add_subdirectory(xllm_ops)
-
-file(GLOB_RECURSE XLLM_CORE_KERNELS_NPU_HEADER
-  "${CMAKE_CURRENT_LIST_DIR}/custom_functions_npu/*.h"
-  "${CMAKE_CURRENT_LIST_DIR}/ops_npu/*.h"
-  "${CMAKE_CURRENT_LIST_DIR}/*.h"
-)
-
-file(GLOB_RECURSE XLLM_CORE_KERNELS_NPU_SRCS
-  "${CMAKE_CURRENT_LIST_DIR}/custom_functions_npu/*.cpp"
-  "${CMAKE_CURRENT_LIST_DIR}/ops_npu/*.cpp"
-  "${CMAKE_CURRENT_LIST_DIR}/*.cpp"
-)
-
-cc_library(
-  NAME
-    npu_kernels
-  HDRS
-    ${XLLM_CORE_KERNELS_NPU_HEADER}
-  SRCS
-    ${XLLM_CORE_KERNELS_NPU_SRCS}
-  DEPS
-    :model_context
-    glog::glog
-    torch
-    torch_npu
-)
+add_subdirectory(xllm_ops)
@@ -22,7 +22,7 @@ void reshape_paged_cache(torch::Tensor& key,
                          torch::Tensor& k_cache,
                          std::optional<torch::Tensor>& v_cache,
                          const torch::Tensor& slot_mapping) {
-  atb::_npu_reshape_and_cache(
+  atb::npu_reshape_and_cache(
       key, value.value(), k_cache, v_cache.value(), slot_mapping);
 }
 
@@ -35,7 +35,7 @@ void batch_prefill(const torch::Tensor& query,
                    torch::Tensor& output) {
   int64_t num_heads = query.size(-2);
   int64_t num_kv_heads = key.size(-2);
-  atb::_npu_flash_attention(
+  atb::npu_flash_attention(
       query, key, value, mask, seq_len, scale, num_heads, num_kv_heads, output);
 }
 
@@ -51,15 +51,15 @@ void batch_decode(const torch::Tensor& query,
   int64_t num_kv_heads = k_cache.size(-2);
   auto q = query.view({-1, num_heads, head_size});
   auto o = output.view({-1, num_heads, head_size});
-  atb::_npu_paged_attention(q,
-                            k_cache,
-                            v_cache,
-                            num_kv_heads,
-                            num_heads,
-                            scale,
-                            block_table,
-                            seq_lens,
-                            o);
+  atb::npu_paged_attention(q,
+                           k_cache,
+                           v_cache,
+                           num_kv_heads,
+                           num_heads,
+                           scale,
+                           block_table,
+                           seq_lens,
+                           o);
 }
 
 }  // namespace xllm::kernel::npu