@@ -4031,7 +4031,7 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
40314031 # split Conv3D into Conv2Ds
40324032 c1, c2, kt, kh, kw = data_torch.shape
40334033 del c1, c2, kh, kw # unused
4034- assert kt == 2, "Current implmentation only support temporal_patch_size of 2"
4034+ assert kt == 2, "Current implementation only support temporal_patch_size of 2"
40354035 yield (gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.V_ENC_EMBD_PATCH] + ".weight" , data_torch[:, :, 0, ...])
40364036 yield (gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.V_ENC_EMBD_PATCH] + ".weight.1", data_torch[:, :, 1, ...])
40374037 else:
@@ -4842,12 +4842,12 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
48424842 yield from super().modify_tensors(data_torch, name, bid)
48434843
48444844
4845- @ModelBase.register("Qwen3_5ForConditionalGeneration")
4845+ @ModelBase.register("Qwen3_5ForConditionalGeneration", "Qwen3_5ForCausalLM" )
48464846class Qwen3_5TextModel(_LinearAttentionVReorderBase):
48474847 model_arch = gguf.MODEL_ARCH.QWEN35
48484848
48494849
4850- @ModelBase.register("Qwen3_5MoeForConditionalGeneration")
4850+ @ModelBase.register("Qwen3_5MoeForConditionalGeneration", "Qwen3_5MoeForCausalLM" )
48514851class Qwen3_5MoeTextModel(_LinearAttentionVReorderBase):
48524852 model_arch = gguf.MODEL_ARCH.QWEN35MOE
48534853
@@ -5404,7 +5404,7 @@ def set_gguf_parameters(self):
54045404 # Get ssm_d_conv from linear_attn_config.short_conv_kernel_size or ssm_d_conv
54055405 linear_attn_config = self.hparams["linear_attn_config"]
54065406 # n_head == 0 for KDA layers, n_head > 0 for MLA layers
5407- # full_attention_layers list will be used to distingush layer type
5407+ # full_attention_layers list will be used to distinguish layer type
54085408 _num_kv_heads = list()
54095409 _full_attn_layers = linear_attn_config["full_attn_layers"]
54105410 for il in range(self.hparams["num_hidden_layers"]):
@@ -6505,7 +6505,7 @@ def set_gguf_parameters(self):
65056505 super().set_gguf_parameters()
65066506 hparams = self.hparams
65076507 self.gguf_writer.add_clip_projector_type(gguf.VisionProjectorType.GEMMA3)
6508- # default values below are taken from HF tranformers code
6508+ # default values below are taken from HF transformers code
65096509 self.gguf_writer.add_vision_attention_layernorm_eps(hparams.get("layer_norm_eps", 1e-6))
65106510 self.gguf_writer.add_vision_use_gelu(True)
65116511 # calculate proj_scale_factor (used by tinygemma3 test model)
@@ -7097,7 +7097,7 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
70977097
70987098 if bid == 0 and "time_mix_a" in new_name:
70997099 # dummy v0/v1/v2 on first layer
7100- # easist way to make llama happy
7100+ # easiest way to make llama happy
71017101 yield (new_name.replace("time_mix_a", "time_mix_v"), data_torch)
71027102
71037103 yield (new_name, data_torch)
@@ -9596,7 +9596,7 @@ def __init__(self, *args, **kwargs):
95969596 # NOTE: Explicitly include hparam prefix prefix for d_model to
95979597 # disambiguate with top-level head_dim
95989598 # NOTE 2: If needed for future models, this can be isolated in a method
9599- # to separate the prefix setting and teh keys used
9599+ # to separate the prefix setting and the keys used
96009600 self.d_model = self.find_hparam([f"{self.hparam_prefixes[0]}_head_dim", "hidden_size", "d_model"])
96019601 self.n_group = self.find_hparam(["n_groups", "num_groups"])
96029602 self.d_inner = self.find_hparam(["expand", "num_heads"]) * self.d_model
@@ -9743,7 +9743,7 @@ def set_gguf_parameters(self):
97439743 self.gguf_writer.add_value_length(self.head_dim)
97449744
97459745 # Set feed_forward_length
9746- # NOTE: This will trigger an override warning. This is preferrable to
9746+ # NOTE: This will trigger an override warning. This is preferable to
97479747 # duplicating all the parent logic
97489748 if not self.is_moe:
97499749 n_ff = self.find_hparam(["intermediate_size", "n_inner", "hidden_dim"])
0 commit comments