diff --git a/QEfficient/transformers/models/qwen2_5_vl/configs/Qwen2.5-VL-32B-Instruct-AWQ.yaml b/QEfficient/transformers/models/qwen2_5_vl/configs/Qwen2.5-VL-32B-Instruct-AWQ.yaml new file mode 100644 index 000000000..458d7e96a --- /dev/null +++ b/QEfficient/transformers/models/qwen2_5_vl/configs/Qwen2.5-VL-32B-Instruct-AWQ.yaml @@ -0,0 +1,3 @@ +FP16NodeInstanceNames: + - logits + - onnx::MatMul_25676 \ No newline at end of file diff --git a/QEfficient/transformers/models/qwen3_moe/configs/Qwen3-30B-A3B-Instruct-2507.yaml b/QEfficient/transformers/models/qwen3_moe/configs/Qwen3-30B-A3B-Instruct-2507.yaml new file mode 100644 index 000000000..466b774b5 --- /dev/null +++ b/QEfficient/transformers/models/qwen3_moe/configs/Qwen3-30B-A3B-Instruct-2507.yaml @@ -0,0 +1 @@ +FP16NodeInstanceNames: ['onnx::MatMul_40141', '/lm_head/MatMul_output_0'] diff --git a/QEfficient/transformers/models/whisper/configs/whisper-large-v3-turbo.yaml b/QEfficient/transformers/models/whisper/configs/whisper-large-v3-turbo.yaml new file mode 100644 index 000000000..7080f2f58 --- /dev/null +++ b/QEfficient/transformers/models/whisper/configs/whisper-large-v3-turbo.yaml @@ -0,0 +1 @@ +FP16NodeInstanceNames: ['onnx::MatMul_5673', 'logits'] diff --git a/examples/audio/configs/wav2vec2-base-960h.yaml b/examples/audio/configs/wav2vec2-base-960h.yaml new file mode 100644 index 000000000..6248f9515 --- /dev/null +++ b/examples/audio/configs/wav2vec2-base-960h.yaml @@ -0,0 +1,2 @@ +FP16NodeInstanceNames: ['onnx::MatMul_1328','/wav2vec2/feature_projection/projection/MatMul_output_0', 'onnx::MatMul_1401', '/wav2vec2/encoder/layers.5/attention/q_proj/MatMul_output_0', 'onnx::MatMul_1408', '/wav2vec2/encoder/layers.5/attention/k_proj/MatMul_output_0', 'onnx::MatMul_1409', '/wav2vec2/encoder/layers.5/attention/v_proj/MatMul_output_0', 'onnx::MatMul_1411', '/wav2vec2/encoder/layers.5/attention/out_proj/MatMul_output_0', 'onnx::MatMul_1412', '/wav2vec2/encoder/layers.5/feed_forward/intermediate_dense/MatMul_output_0', 'onnx::MatMul_1413', '/wav2vec2/encoder/layers.5/feed_forward/output_dense/MatMul_output_0', 'onnx::MatMul_1414', '/wav2vec2/encoder/layers.6/attention/q_proj/MatMul_output_0', 'onnx::MatMul_1421', '/wav2vec2/encoder/layers.6/attention/k_proj/MatMul_output_0', 'onnx::MatMul_1422', '/wav2vec2/encoder/layers.6/attention/v_proj/MatMul_output_0', 'onnx::MatMul_1424', '/wav2vec2/encoder/layers.6/attention/out_proj/MatMul_output_0', 'onnx::MatMul_1425', '/wav2vec2/encoder/layers.6/feed_forward/intermediate_dense/MatMul_output_0', 'onnx::MatMul_1426', '/wav2vec2/encoder/layers.6/feed_forward/output_dense/MatMul_output_0'] + diff --git a/examples/audio/configs/whisper-large-v3-turbo.yaml b/examples/audio/configs/whisper-large-v3-turbo.yaml new file mode 100644 index 000000000..7080f2f58 --- /dev/null +++ b/examples/audio/configs/whisper-large-v3-turbo.yaml @@ -0,0 +1 @@ +FP16NodeInstanceNames: ['onnx::MatMul_5673', 'logits'] diff --git a/examples/image_text_to_text/models/qwen_vl/configs/Qwen2.5-VL-32B-Instruct-AWQ.yaml b/examples/image_text_to_text/models/qwen_vl/configs/Qwen2.5-VL-32B-Instruct-AWQ.yaml new file mode 100644 index 000000000..458d7e96a --- /dev/null +++ b/examples/image_text_to_text/models/qwen_vl/configs/Qwen2.5-VL-32B-Instruct-AWQ.yaml @@ -0,0 +1,3 @@ +FP16NodeInstanceNames: + - logits + - onnx::MatMul_25676 \ No newline at end of file diff --git a/examples/text_generation/configs/Qwen3-30B-A3B-Instruct-2507.yaml b/examples/text_generation/configs/Qwen3-30B-A3B-Instruct-2507.yaml new file mode 100644 index 000000000..466b774b5 --- /dev/null +++ b/examples/text_generation/configs/Qwen3-30B-A3B-Instruct-2507.yaml @@ -0,0 +1 @@ +FP16NodeInstanceNames: ['onnx::MatMul_40141', '/lm_head/MatMul_output_0']