diff --git a/quant.h b/quant.h index 39a3a75..2070249 100644 --- a/quant.h +++ b/quant.h @@ -9982,7 +9982,7 @@ static tq_model_t* tq_load_safetensors(const char* path) { free(tensors); - /* Qwen3.5 RMSNorm adjustment: Qwen3_5RMSNorm computes + /* Qwen RMSNorm adjustment: Qwen's RMSNorm computes * output = norm(x) * (1.0 + weight), NOT norm(x) * weight. * We bake the "+1" into the weight so tq_rmsnorm can stay as * out = x * rsqrt * weight. @@ -9992,8 +9992,14 @@ static tq_model_t* tq_load_safetensors(const char* path) { * It does NOT apply to: linear_attn.norm (Qwen3_5RMSNormGated * uses plain weight without +1). * - * We detect Qwen3.5 by the presence of DeltaNet layers. */ - if (model->config.delta_n_heads > 0) { + * Applies to all Qwen-family models (qwen2, qwen3, qwen3_5, etc.) + * Detected by arch string or DeltaNet presence. */ + int is_qwen_family = (model->config.delta_n_heads > 0); + if (model->gguf_ctx) { + const tq_gguf_ctx_t* gctx = (const tq_gguf_ctx_t*)model->gguf_ctx; + if (strstr(gctx->arch, "qwen") != NULL) is_qwen_family = 1; + } + if (is_qwen_family) { int dim_h = model->config.hidden_dim; int head_dim_h = model->config.head_dim; @@ -10022,7 +10028,7 @@ static tq_model_t* tq_load_safetensors(const char* path) { for (int i = 0; i < dim_h; i++) model->output_norm[i] += 1.0f; } - fprintf(stderr, "tq_load_model: applied Qwen3.5 RMSNorm +1 weight adjustment\n"); + fprintf(stderr, "tq_load_model: applied Qwen RMSNorm +1 weight adjustment\n"); } /* Gemma3 RMSNorm adjustment: same (1+w) scaling as Qwen3.5 */ diff --git a/src/engine/tq_model.c b/src/engine/tq_model.c index d2b094f..e0a0113 100644 --- a/src/engine/tq_model.c +++ b/src/engine/tq_model.c @@ -1517,7 +1517,7 @@ static tq_model_t* tq_load_safetensors(const char* path) { free(tensors); - /* Qwen3.5 RMSNorm adjustment: Qwen3_5RMSNorm computes + /* Qwen RMSNorm adjustment: Qwen's RMSNorm computes * output = norm(x) * (1.0 + weight), NOT norm(x) * weight. * We bake the "+1" into the weight so tq_rmsnorm can stay as * out = x * rsqrt * weight. @@ -1527,8 +1527,14 @@ static tq_model_t* tq_load_safetensors(const char* path) { * It does NOT apply to: linear_attn.norm (Qwen3_5RMSNormGated * uses plain weight without +1). * - * We detect Qwen3.5 by the presence of DeltaNet layers. */ - if (model->config.delta_n_heads > 0) { + * Applies to all Qwen-family models (qwen2, qwen3, qwen3_5, etc.) + * Detected by arch string or DeltaNet presence. */ + int is_qwen_family = (model->config.delta_n_heads > 0); + if (model->gguf_ctx) { + const tq_gguf_ctx_t* gctx = (const tq_gguf_ctx_t*)model->gguf_ctx; + if (strstr(gctx->arch, "qwen") != NULL) is_qwen_family = 1; + } + if (is_qwen_family) { int dim_h = model->config.hidden_dim; int head_dim_h = model->config.head_dim; @@ -1557,7 +1563,7 @@ static tq_model_t* tq_load_safetensors(const char* path) { for (int i = 0; i < dim_h; i++) model->output_norm[i] += 1.0f; } - fprintf(stderr, "tq_load_model: applied Qwen3.5 RMSNorm +1 weight adjustment\n"); + fprintf(stderr, "tq_load_model: applied Qwen RMSNorm +1 weight adjustment\n"); } /* Gemma3 RMSNorm adjustment: same (1+w) scaling as Qwen3.5 */ diff --git a/wasm/quant.wasm b/wasm/quant.wasm index 7c3b273..850de4b 100755 Binary files a/wasm/quant.wasm and b/wasm/quant.wasm differ