File tree Expand file tree Collapse file tree 1 file changed +3
-2
lines changed
tpu_inference/models/common Expand file tree Collapse file tree 1 file changed +3
-2
lines changed Original file line number Diff line number Diff line change 2727# Architectures that prefer "vllm" implementation type when MODEL_IMPL_TYPE is "auto".
2828# These architectures are listed here because they have better performance with the
2929# vLLM PyTorch backend compared to the flax_nnx JAX backend for now.
30- _VLLM_REQUIRED_ARCHITECTURES : frozenset [str ] = frozenset ({"GptOssForCausalLM" })
30+ _VLLM_PREFERRED_ARCHITECTURES : frozenset [str ] = frozenset (
31+ {"GptOssForCausalLM" })
3132
3233
3334class UnsupportedArchitectureError (ValueError ):
@@ -355,7 +356,7 @@ def get_model(
355356 f"Expected exactly one architecture, got { len (architectures )} : "
356357 f"{ architectures } " )
357358 arch = architectures [0 ]
358- impl = "vllm" if arch in _VLLM_REQUIRED_ARCHITECTURES else "flax_nnx"
359+ impl = "vllm" if arch in _VLLM_PREFERRED_ARCHITECTURES else "flax_nnx"
359360 logger .info (f"Resolved MODEL_IMPL_TYPE 'auto' to '{ impl } '" )
360361
361362 match impl :
You can’t perform that action at this time.
0 commit comments