Skip to content

Commit b205fe6

Browse files
committed
feat: support glm4v for npu.
1 parent b8c4168 commit b205fe6

File tree

10 files changed

+1000
-43
lines changed

10 files changed

+1000
-43
lines changed

xllm/core/layers/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ cc_library(
6464
qwen3_vision_encode_layer.h
6565
qwen3_decoder_layer.h
6666
qwen3_moe_decoder_layer.h
67+
glm4_decoder_layer.h
6768
rms_norm.h
6869
siglip_encoder_layer.h
6970
pos_embedding.h
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
/* Copyright 2025 The xLLM Authors. All Rights Reserved.
2+
Licensed under the Apache License, Version 2.0 (the "License");
3+
you may not use this file except in compliance with the License.
4+
You may obtain a copy of the License at
5+
https://github.com/jd-opensource/xllm/blob/main/LICENSE
6+
Unless required by applicable law or agreed to in writing, software
7+
distributed under the License is distributed on an "AS IS" BASIS,
8+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9+
See the License for the specific language governing permissions and
10+
limitations under the License.
11+
==============================================================================*/
12+
#pragma once
13+
#include "npu/npu_glm4_decoder_layer_impl.h"
14+
namespace xllm {
15+
namespace layer {
16+
#if defined(USE_NPU)
17+
class Glm4DecoderLayer
18+
: public torch::nn::ModuleHolder<NpuGlm4DecoderLayerImpl> {
19+
public:
20+
using torch::nn::ModuleHolder<NpuGlm4DecoderLayerImpl>::ModuleHolder;
21+
using Impl __attribute__((__unused__)) = NpuGlm4DecoderLayerImpl;
22+
Glm4DecoderLayer(const ModelContext& context)
23+
: ModuleHolder(std::make_shared<NpuGlm4DecoderLayerImpl>(context)) {}
24+
};
25+
#else
26+
class Glm4DecoderLayer : public torch::nn::ModuleHolder<Qwen2DecoderImpl> {
27+
public:
28+
using torch::nn::ModuleHolder<Qwen2DecoderImpl>::ModuleHolder;
29+
using Impl __attribute__((__unused__)) = Qwen2DecoderImpl;
30+
Glm4DecoderLayer(const ModelContext& context)
31+
: ModuleHolder(std::make_shared<Qwen2DecoderImpl>(context)) {}
32+
};
33+
#endif
34+
} // namespace layer
35+
} // namespace xllm

xllm/core/layers/npu/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ cc_library(
2424
npu_llama_decoder_layer_impl.h
2525
npu_qwen2_decoder_layer_impl.h
2626
npu_qwen3_decoder_layer_impl.h
27+
npu_glm4_decoder_layer_impl.h
2728
npu_rms_norm_impl.h
2829
npu_siglip_encoder_layer_impl.h
2930
SRCS
@@ -45,6 +46,7 @@ cc_library(
4546
npu_llama_decoder_layer_impl.cpp
4647
npu_qwen2_decoder_layer_impl.cpp
4748
npu_qwen3_decoder_layer_impl.cpp
49+
npu_glm4_decoder_layer_impl.cpp
4850
npu_rms_norm_impl.cpp
4951
npu_siglip_encoder_layer_impl.cpp
5052
DEPS

0 commit comments

Comments
 (0)