File tree Expand file tree Collapse file tree 1 file changed +6
-2
lines changed
vllm/model_executor/layers/fused_moe Expand file tree Collapse file tree 1 file changed +6
-2
lines changed Original file line number Diff line number Diff line change @@ -1200,10 +1200,14 @@ def weight_loader(
12001200 if full_load :
12011201 shard_dim += 1
12021202
1203- # Materialize GGUF UninitializedParameter
1203+ # Materialize GGUF UninitializedParameter accounting merged weights
12041204 if is_gguf_weight and isinstance (param , UninitializedParameter ):
1205+ # To materialize a tensor, we must have full shape including
1206+ # number of experts, making this portion to require `full_load`.
1207+ assert full_load
12051208 final_shape = list (loaded_weight .shape )
1206- if shard_id in ["w1" , "w3" ]:
1209+ # w1 and w3 are merged per expert.
1210+ if shard_id in {"w1" , "w3" }:
12071211 final_shape [1 ] *= 2
12081212 final_shape [shard_dim ] = final_shape [shard_dim ] // self .tp_size
12091213 param .materialize (final_shape , dtype = loaded_weight .dtype )
You can’t perform that action at this time.
0 commit comments