|
1 | 1 | #include "models.h" |
2 | 2 |
|
3 | | - |
4 | | - |
5 | 3 | llm_build_bert::llm_build_bert(const llama_model & model, const llm_graph_params & params) : llm_graph_context(params) { |
6 | 4 | const int64_t n_embd_head = hparams.n_embd_head_v; |
7 | 5 | const int64_t n_embd_gqa = hparams.n_embd_v_gqa(); |
@@ -129,9 +127,17 @@ llm_build_bert::llm_build_bert(const llama_model & model, const llm_graph_params |
129 | 127 | // feed-forward network |
130 | 128 | if (hparams.moe_every_n_layers > 0 && il % hparams.moe_every_n_layers == 1) { |
131 | 129 | // MoE branch |
132 | | - cur = build_moe_ffn(cur, model.layers[il].ffn_gate_inp, model.layers[il].ffn_up_exps, nullptr, |
133 | | - model.layers[il].ffn_down_exps, nullptr, hparams.n_expert, hparams.n_expert_used, |
134 | | - LLM_FFN_GELU, false, false, 0.0f, LLAMA_EXPERT_GATING_FUNC_TYPE_SOFTMAX, il); |
| 130 | + cur = build_moe_ffn(cur, |
| 131 | + model.layers[il].ffn_gate_inp, |
| 132 | + model.layers[il].ffn_up_exps, |
| 133 | + nullptr, |
| 134 | + model.layers[il].ffn_down_exps, |
| 135 | + nullptr, |
| 136 | + hparams.n_expert, hparams.n_expert_used, |
| 137 | + LLM_FFN_GELU, false, |
| 138 | + hparams.expert_weights_scale, |
| 139 | + LLAMA_EXPERT_GATING_FUNC_TYPE_SOFTMAX, |
| 140 | + il); |
135 | 141 | cb(cur, "ffn_moe_out", il); |
136 | 142 | } else if (model.arch == LLM_ARCH_BERT || model.arch == LLM_ARCH_NOMIC_BERT_MOE || |
137 | 143 | model.arch == LLM_ARCH_JINA_BERT_V3) { |
|
0 commit comments