Skip to content

Commit d7ec230

Browse files
DongheJinyq33victor
authored andcommitted
feat: add support for the GLM-4.5-Air model. (#370)
1 parent 76ff175 commit d7ec230

File tree

4 files changed

+3
-8
lines changed

4 files changed

+3
-8
lines changed

xllm/core/framework/model/model_args.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,6 @@ struct ModelArgs {
5454
PROPERTY(int64_t, vocab_size) = -1;
5555

5656
PROPERTY(bool, use_qk_norm) = false;
57-
5857
PROPERTY(float, rms_norm_eps) = 0.0f;
5958

6059
PROPERTY(float, layer_norm_eps) = 0.0f;

xllm/core/layers/npu/npu_glm4_moe_decoder_layer.cpp

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ enum DecoderLayerTensorId : int {
109109
K_NORM_WEIGHT = 69
110110
};
111111

112-
static uint64_t WEIGHT_COUNT_PER_LAYER = 70;
112+
static uint64_t WEIGHT_COUNT_PER_LAYER = 68;
113113

114114
static std::unordered_map<std::string, int> WEIGHT_MAPPING = {
115115
{"input_layernorm.weight", IN_INPUT_NORM_WEIGHT},
@@ -125,9 +125,6 @@ static std::unordered_map<std::string, int> WEIGHT_MAPPING = {
125125

126126
{"self_attn.o_proj.weight", IN_QKV_DENSE_WEIGHT},
127127

128-
{"self_attn.q_norm.weight", Q_NORM_WEIGHT},
129-
{"self_attn.k_norm.weight", K_NORM_WEIGHT},
130-
131128
{"post_attention_layernorm.weight", IN_POST_ATTN_NORM_WEIGHT},
132129

133130
// mlp or shared expert
@@ -180,9 +177,6 @@ static std::unordered_map<std::string, int> WEIGHT_MAPPING_W8A8 = {
180177
{"self_attn.o_proj.weight_offset", IN_QKV_DENSE_OFFSET},
181178
{"self_attn.o_proj.weight_scale", IN_QKV_DENSE_SCALE},
182179

183-
{"self_attn.q_norm.weight", Q_NORM_WEIGHT},
184-
{"self_attn.k_norm.weight", K_NORM_WEIGHT},
185-
186180
{"post_attention_layernorm.weight", IN_POST_ATTN_NORM_WEIGHT},
187181
{"post_attention_layernorm.bias", IN_POST_ATTN_NORM_NEW_BIAS},
188182

xllm/models/llm/glm4_moe.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -348,6 +348,7 @@ REGISTER_MODEL_ARGS(glm4_moe, [&] {
348348
LOAD_ARG_OR(num_experts_per_tok, "num_experts_per_tok", 8);
349349
LOAD_ARG_OR(n_layers, "num_hidden_layers", 48);
350350
LOAD_ARG_OR(n_kv_heads, "num_key_value_heads", 4);
351+
LOAD_ARG_OR(use_qk_norm, "use_qk_norm", true);
351352
LOAD_ARG_OR(rms_norm_eps, "rms_norm_eps", 1e-6);
352353
LOAD_ARG_OR(rope_theta, "rope_theta", 1000000.0f);
353354
LOAD_ARG_OR(tie_word_embeddings, "tie_word_embeddings", false);

xllm/models/llm/glm4_moe_mtp.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -335,6 +335,7 @@ REGISTER_MODEL_ARGS(glm4_moe_mtp, [&] {
335335
LOAD_ARG_OR(num_experts_per_tok, "num_experts_per_tok", 8);
336336
LOAD_ARG_OR(n_layers, "num_hidden_layers", 48);
337337
LOAD_ARG_OR(n_kv_heads, "num_key_value_heads", 4);
338+
LOAD_ARG_OR(use_qk_norm, "use_qk_norm", true);
338339
LOAD_ARG_OR(rms_norm_eps, "rms_norm_eps", 1e-6);
339340
LOAD_ARG_OR(rope_theta, "rope_theta", 1000000.0f);
340341
LOAD_ARG_OR(tie_word_embeddings, "tie_word_embeddings", false);

0 commit comments

Comments
 (0)