Skip to content

Commit 1b00746

Browse files
author
zuolve.lai
committed
feat: support iluvatar backend qwen3 0.6b run through
1 parent 257c867 commit 1b00746

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+1359
-44
lines changed

CMakeLists.txt

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ set_property(GLOBAL PROPERTY USE_FOLDERS ON)
33

44
option(USE_NPU "Enable NPU support" OFF)
55
option(USE_MLU "Enable MLU support" OFF)
6+
option(USE_ILU "Enable ILU support" OFF)
67
option(USE_CUDA "Enable CUDA support" OFF)
78
add_compile_definitions(YLT_ENABLE_IBV)
89
add_definitions(-DYLT_ENABLE_IBV)
@@ -105,7 +106,7 @@ set(CMAKE_CXX_STANDARD 20)
105106
set(CMAKE_CXX_STANDARD_REQUIRED ON)
106107
set(CMAKE_CXX_EXTENSIONS ON)
107108

108-
if(USE_NPU OR USE_CUDA)
109+
if(USE_NPU OR USE_CUDA OR USE_ILU)
109110
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_GLIBCXX_USE_CXX11_ABI=0")
110111
add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0)
111112
elseif(USE_MLU)
@@ -208,6 +209,19 @@ if(USE_CUDA)
208209
message(STATUS "TORCH_CUDA_ARCH_LIST: ${TORCH_CUDA_ARCH_LIST}")
209210
endif()
210211

212+
if(USE_ILU)
213+
set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/cmake/Modules;${CMAKE_MODULE_PATH}")
214+
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
215+
set(CMAKE_CUDA_ARCHITECTURES "ivcore11")
216+
set(WARNINGS_AS_ERRORS OFF)
217+
if (CMAKE_CXX_COMPILER_ID MATCHES "Clang")
218+
add_definitions(
219+
-Wno-c++11-narrowing
220+
-Wno-thread-safety-analysis
221+
)
222+
endif()
223+
endif()
224+
211225
# configure vcpkg
212226
# have to set CMAKE_TOOLCHAIN_FILE before first project call.
213227
# if (DEFINED ENV{VCPKG_ROOT} AND NOT DEFINED CMAKE_TOOLCHAIN_FILE)
@@ -424,6 +438,23 @@ if(USE_CUDA)
424438
)
425439
endif()
426440

441+
if(USE_ILU)
442+
add_definitions(-DUSE_ILU)
443+
set(CMAKE_VERBOSE_MAKEFILE ON)
444+
include_directories(
445+
$ENV{PYTHON_INCLUDE_PATH}
446+
$ENV{PYTORCH_INSTALL_PATH}/include
447+
$ENV{PYTORCH_INSTALL_PATH}/include/torch/csrc/api/include
448+
$ENV{IXFORMER_INSTALL_PATH}/csrc/include/ixformer
449+
)
450+
451+
link_directories(
452+
$ENV{PYTHON_LIB_PATH}
453+
$ENV{PYTORCH_INSTALL_PATH}/lib
454+
$ENV{IXFORMER_INSTALL_PATH}
455+
)
456+
endif()
457+
427458
# check if USE_CXX11_ABI is set correctly
428459
# if (DEFINED USE_CXX11_ABI)
429460
# parse_make_options(${TORCH_CXX_FLAGS} "TORCH_CXX_FLAGS")

setup.py

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -106,11 +106,11 @@ def get_torch_root_path():
106106
except ImportError:
107107
return None
108108

109-
def get_torch_mlu_root_path():
109+
def get_ixformer_root_path():
110110
try:
111-
import torch_mlu
111+
import ixformer
112112
import os
113-
return os.path.dirname(os.path.abspath(torch_mlu.__file__))
113+
return os.path.dirname(os.path.abspath(ixformer.__file__))
114114
except ImportError:
115115
return None
116116

@@ -226,7 +226,15 @@ def set_cuda_envs():
226226
os.environ["CUDA_TOOLKIT_ROOT_DIR"] = "/usr/local/cuda"
227227
os.environ["NCCL_ROOT"] = get_nccl_root_path()
228228
os.environ["NCCL_VERSION"] = "2"
229-
229+
230+
def set_ilu_envs():
231+
os.environ["PYTHON_INCLUDE_PATH"] = get_python_include_path()
232+
os.environ["PYTHON_LIB_PATH"] = get_torch_root_path()
233+
os.environ["LIBTORCH_ROOT"] = get_torch_root_path()
234+
os.environ["PYTORCH_INSTALL_PATH"] = get_torch_root_path()
235+
os.environ["CUDA_TOOLKIT_ROOT_DIR"] = "/usr/local/corex"
236+
os.environ["IXFORMER_INSTALL_PATH"] = get_ixformer_root_path()
237+
230238
class CMakeExtension(Extension):
231239
def __init__(self, name: str, path: str, sourcedir: str = "") -> None:
232240
super().__init__(name, sources=[])
@@ -275,8 +283,7 @@ def run(self):
275283
for ext in self.extensions:
276284
self.build_extension(ext)
277285
except Exception as e:
278-
print("ERROR: Build failed.")
279-
print(f"Details: {e}")
286+
print("Build failed.")
280287
exit(1)
281288

282289
def build_extension(self, ext: CMakeExtension):
@@ -308,7 +315,7 @@ def build_extension(self, ext: CMakeExtension):
308315
f"-DDEVICE_ARCH={self.arch.upper()}",
309316
f"-DINSTALL_XLLM_KERNELS={'ON' if self.install_xllm_kernels else 'OFF'}",
310317
]
311-
318+
312319
if self.device == "a2" or self.device == "a3":
313320
cmake_args += ["-DUSE_NPU=ON"]
314321
# set npu environment variables
@@ -323,6 +330,9 @@ def build_extension(self, ext: CMakeExtension):
323330
f"-DCMAKE_CUDA_ARCHITECTURES={cuda_architectures}"]
324331
# set cuda environment variables
325332
set_cuda_envs()
333+
elif self.device == "ilu":
334+
cmake_args += ["-DUSE_ILU=ON"]
335+
set_ilu_envs()
326336
else:
327337
raise ValueError("Please set --device to a2 or a3 or mlu or cuda.")
328338

@@ -340,6 +350,7 @@ def build_extension(self, ext: CMakeExtension):
340350

341351
build_args = ["--config", build_type]
342352
max_jobs = os.getenv("MAX_JOBS", str(os.cpu_count()))
353+
# max_jobs="2"
343354
build_args += ["-j" + max_jobs]
344355

345356
env = os.environ.copy()
@@ -553,7 +564,7 @@ def pre_build():
553564
exit(0)
554565

555566
if __name__ == "__main__":
556-
device = 'a2' # default
567+
device = 'ilu' # default
557568
arch = get_cpu_arch()
558569
install_kernels = True
559570
if '--device' in sys.argv:

third_party/CMakeLists.txt

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,4 +19,32 @@ target_include_directories(mooncake_store PUBLIC
1919
${CMAKE_CURRENT_SOURCE_DIR}/Mooncake/mooncake-transfer-engine/include
2020
)
2121

22+
if(USE_ILU)
23+
if(TARGET cpprest)
24+
set_target_properties(cpprest PROPERTIES
25+
CXX_STANDARD 20
26+
CXX_STANDARD_REQUIRED ON
27+
CXX_EXTENSIONS OFF
28+
)
29+
endif()
30+
if(TARGET transfer_engine)
31+
target_compile_options(transfer_engine PRIVATE -std=c++20)
32+
set_target_properties(transfer_engine PROPERTIES
33+
CXX_STANDARD 20
34+
CXX_STANDARD_REQUIRED ON
35+
)
36+
message(STATUS "Set C++20 for transfer_engine target")
37+
endif()
38+
if(TARGET SMHasherSupport)
39+
set_target_properties(SMHasherSupport PROPERTIES
40+
CXX_STANDARD 11
41+
CXX_STANDARD_REQUIRED ON
42+
CXX_EXTENSIONS OFF
43+
)
44+
message(STATUS "SMHasherSupport target found and configured")
45+
else()
46+
message(WARNING "SMHasherSupport target not found after adding smhasher")
47+
endif()
48+
endif()
49+
2250
target_link_libraries(mooncake_store PUBLIC transfer_engine cachelib_memory_allocator)

third_party/brpc

Submodule brpc updated from 67d6372 to 415f537

third_party/custom_patch/Mooncake.patch

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,18 @@
11
diff --git a/CMakeLists.txt b/CMakeLists.txt
2-
index 047ae3e..b4ae201 100644
2+
index 047ae3e..634b1bf 100644
33
--- a/CMakeLists.txt
44
+++ b/CMakeLists.txt
5-
@@ -16,7 +16,7 @@ option(WITH_STORE "build mooncake store library and sample code" ON)
5+
@@ -1,6 +1,8 @@
6+
cmake_minimum_required(VERSION 3.16)
7+
project(mooncake CXX C)
8+
-
9+
+set(CMAKE_CXX_STANDARD 20)
10+
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
11+
+set(CMAKE_CXX_EXTENSIONS OFF)
12+
# indicates cmake is invoked from top-level dir
13+
set(GLOBAL_CONFIG "true")
14+
15+
@@ -16,7 +18,7 @@ option(WITH_STORE "build mooncake store library and sample code" ON)
616
option(WITH_P2P_STORE "build p2p store library and sample code" OFF)
717
option(WITH_RUST_EXAMPLE "build the Rust interface and sample code for the transfer engine" OFF)
818

third_party/hccl_transfer

Submodule hccl_transfer updated from 1a7d59f to 4fd9603

third_party/minja

Submodule minja updated from cf9734a to 1d2b82b

third_party/xllm_ops

Submodule xllm_ops updated from 797a0cb to a8e8b15

xllm/core/distributed_runtime/worker_server.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ void WorkerServer::create_server(
104104

105105
CollectiveCommunicator comm(worker_global_rank, world_size, dp_size, ep_size);
106106
const ParallelArgs* parallel_args = comm.parallel_args();
107-
#if defined(USE_MLU) || defined(USE_CUDA)
107+
#if defined(USE_MLU) || defined(USE_CUDA) || defined(USE_ILU)
108108
comm.create_process_groups(master_node_addr, device);
109109
#endif
110110

xllm/core/framework/batch/batch_input_builder.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,7 @@ void BatchInputBuilder::process_sequences_multithreaded() {
196196
state_.q_seq_lens.insert(state_.q_seq_lens.end(),
197197
state.q_seq_lens.begin(),
198198
state.q_seq_lens.end());
199-
#elif defined(USE_MLU) || defined(USE_CUDA)
199+
#elif defined(USE_MLU) || defined(USE_CUDA) || defined(USE_ILU)
200200
int32_t seq_len_offset = state_.seq_lens.back();
201201
// skip the first element which is 0
202202
for (size_t i = 1; i < state.seq_lens.size(); ++i) {
@@ -281,7 +281,7 @@ void BatchInputBuilder::process_single_sequence(
281281
#if defined(USE_NPU)
282282
state.seq_lens.push_back(seq_len);
283283
state.q_seq_lens.push_back(q_seq_len);
284-
#elif defined(USE_MLU) || defined(USE_CUDA)
284+
#elif defined(USE_MLU) || defined(USE_CUDA) || defined(USE_ILU)
285285
state.seq_lens.push_back(state.seq_lens.back() + seq_len);
286286
state.q_seq_lens.push_back(state.q_seq_lens.back() + q_seq_len);
287287
#endif
@@ -510,7 +510,7 @@ void BatchInputBuilder::padding_decode_batch_size(
510510
#if defined(USE_NPU)
511511
state_.seq_lens.push_back(num_decoding_tokens);
512512
state_.q_seq_lens.push_back(num_decoding_tokens);
513-
#elif defined(USE_MLU) || defined(USE_CUDA)
513+
#elif defined(USE_MLU) || defined(USE_CUDA) || defined(USE_ILU)
514514
state_.seq_lens.push_back(state_.seq_lens.back() + num_decoding_tokens);
515515
state_.q_seq_lens.push_back(state_.q_seq_lens.back() +
516516
num_decoding_tokens);

0 commit comments

Comments
 (0)