Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 92 additions & 0 deletions QEfficient/utils/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,68 @@
import torch.nn as nn
import torchvision.transforms as T
from torchvision.transforms.functional import InterpolationMode
from transformers import (
AutoModelForCausalLM,
AutoModelForImageTextToText,
)


def load_vlm_model(config):
try:
model_hf = AutoModelForImageTextToText.from_pretrained(
config._name_or_path,
low_cpu_mem_usage=False,
config=config,
)
except ValueError:
model_hf = AutoModelForCausalLM.from_pretrained(
config._name_or_path,
low_cpu_mem_usage=False,
trust_remote_code=True,
config=config,
)
model_hf.eval()
return model_hf


def load_vlm_model_from_config(config):
try:
model_hf = AutoModelForImageTextToText.from_config(
config,
attn_implementation="eager",
trust_remote_code=True,
)
except ValueError:
model_hf = AutoModelForCausalLM.from_config(
config,
attn_implementation="eager",
trust_remote_code=True,
)
torch_dtype = getattr(model_hf.config, "torch_dtype", None)
if torch_dtype == torch.bfloat16 or torch_dtype == torch.float16:
model_hf = model_hf.to(torch.float32)
model_hf.eval()
return model_hf


def set_num_layers_vlm(config, n_layer=1):
## -1 indicates use all the layers of the model.
if n_layer == -1:
return config
elif hasattr(config, "model_type") and "mllama" in config.model_type:
config.text_config.num_hidden_layers = n_layer
config.text_config.cross_attention_layers = [
x for x in config.text_config.cross_attention_layers if x < n_layer
]
elif hasattr(config, "text_config"):
config.text_config.num_hidden_layers = n_layer
config.vision_config.num_hidden_layers = n_layer
elif hasattr(config, "llm_config"):
config.llm_config.num_hidden_layers = n_layer
config.vision_config.num_hidden_layers = n_layer
else:
config.num_hidden_layers = n_layer
return config


# Processor class for InternVL models
Expand Down Expand Up @@ -169,6 +231,36 @@ class ModelConfig:
"TheBloke/TinyLlama-1.1B-Chat-v0.3-AWQ",
}

STANDARD_VLM_MODELS = {
"llava-hf/llava-1.5-7b-hf",
"meta-llama/Llama-4-Scout-17B-16E-Instruct",
"google/gemma-3-4b-it",
"mistralai/Mistral-Small-3.1-24B-Instruct-2503",
"Qwen/Qwen2.5-VL-3B-Instruct",
"meta-llama/Llama-3.2-11B-Vision-Instruct",
}

INTERNVL_MODELS = {
"OpenGVLab/InternVL2_5-1B",
"OpenGVLab/InternVL3_5-1B",
}

MOLMO_MODELS = {
"allenai/Molmo-7B-D-0924",
}

SKIPPED_MODELS = {
"meta-llama/Llama-4-Scout-17B-16E-Instruct",
"allenai/Molmo-7B-D-0924",
"meta-llama/Llama-3.2-11B-Vision-Instruct",
}

DUAL_QPC_MODELS = {
"OpenGVLab/InternVL2_5-1B",
"OpenGVLab/InternVL3_5-1B",
"Qwen/Qwen2.5-VL-3B-Instruct",
}

EXTERNAL_MODELS = {
"hpcai-tech/grok-1": {
"pytorch_hf_tokens_custom_case": [
Expand Down
7 changes: 4 additions & 3 deletions scripts/Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,8 @@ pipeline {
mkdir -p $PWD/Non_cli_qaic_multimodal &&
export TOKENIZERS_PARALLELISM=false &&
export QEFF_HOME=$PWD/Non_cli_qaic_multimodal &&
pytest tests -m '(not cli) and (on_qaic) and (multimodal) and (not qnn) and (not finetune) and (not diffusion_models)' --ignore tests/vllm --ignore tests/unit_test --junitxml=tests/tests_log6.xml --durations=10 &&
pytest tests -m '(not cli) and (on_qaic) and (multimodal) and (not qnn) and (not finetune) and (not diffusion_models) and (not nightly)' --ignore tests/vllm --ignore tests/unit_test --junitxml=tests/tests_log6.xml --durations=10 &&

junitparser merge tests/tests_log6.xml tests/tests_log.xml &&
deactivate"
'''
Expand Down Expand Up @@ -203,9 +204,9 @@ pipeline {
cd /efficient-transformers &&
. preflight_qeff/bin/activate &&
# TODO: Update torch_qaic path to py312 when migrating to Python 3.12
pip install /opt/qti-aic/integrations/torch_qaic/py312/torch_qaic-0.1.0-cp312-cp312-linux_x86_64.whl &&
pip install /opt/qti-aic/integrations/torch_qaic/py312/torch_qaic-0.1.0-cp312-cp312-manylinux_2_34_x86_64.whl &&
# pip install /opt/qti-aic/integrations/torch_qaic/py310/torch_qaic-0.1.0-cp310-cp310-linux_x86_64.whl &&
pip install torch==2.9.0 torchvision==0.24.0 torchaudio==2.9.0 --index-url https://download.pytorch.org/whl/cpu &&
pip install torch==2.9.1 torchvision==0.24.1 torchaudio==2.9.1 --index-url https://download.pytorch.org/whl/cpu &&
mkdir -p $PWD/cli_qaic_finetuning &&
export TOKENIZERS_PARALLELISM=false &&
export QEFF_HOME=$PWD/cli_qaic_finetuning &&
Expand Down
52 changes: 51 additions & 1 deletion tests/configs/causal_model_configs.json
Original file line number Diff line number Diff line change
Expand Up @@ -487,5 +487,55 @@
}
}
}
],
"disaggregated_causal_lm_models": [
{
"model_name": "openai/gpt-oss-120b",
"model_type": "gpt_oss",
"additional_params": {
"num_hidden_layers": 2,
"hidden_size": 64,
"intermediate_size": 256,
"num_attention_heads": 2,
"num_key_value_heads": 1,
"num_local_experts": 4
}
}
],
"disaggregated_dummy_models": [
{
"model_name": "openai/gpt-oss-20b",
"model_type": "gpt_oss",
"tokenizer_id": "gpt2",
"additional_params": {
"num_hidden_layers": 2,
"hidden_size": 64,
"intermediate_size": 256,
"num_attention_heads": 2,
"num_key_value_heads": 1,
"num_local_experts": 4,
"head_dim": 32,
"max_position_embeddings": 512,
"vocab_size": 201088,
"sliding_window": 128
}
},
{
"model_name": "Qwen/Qwen3-30B-A3B-Instruct-2507",
"model_type": "qwen3_moe",
"additional_params": {
"hidden_size": 256,
"intermediate_size": 256,
"max_position_embeddings": 512,
"max_window_layers": 48,
"moe_intermediate_size": 768,
"num_attention_heads": 2,
"num_experts": 4,
"num_experts_per_tok": 2,
"num_hidden_layers": 2,
"num_key_value_heads": 1,
"vocab_size": 151936
}
}
]
}
}
Loading