Skip to content

Commit 1cb85ef

Browse files
martinlsmMartin Lindströmoscarandersson8218
authored
Arm backend: Test partial quantization of models (#16249)
Run tests of partial quantization on MobileNetV2 and Llama using the INT+FP profile. The tests quantize only some layer types and runs the rest in floating point. cc @freddan80 @per @zingo @oscarandersson8218 @digantdesai Signed-off-by: Martin Lindström <Martin.Lindstroem@arm.com> Co-authored-by: Martin Lindström <Martin.Lindstroem@arm.com> Co-authored-by: Oscar Andersson <87121123+oscarandersson8218@users.noreply.github.com>
1 parent 0ac5213 commit 1cb85ef

File tree

2 files changed

+50
-0
lines changed

2 files changed

+50
-0
lines changed

backends/arm/test/models/test_llama.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import pytest
1717
import torch
1818
from executorch.backends.arm._passes import InsertInt32CastsAfterInt64PlaceholdersPass
19+
from executorch.backends.arm.quantizer import get_symmetric_quantization_config
1920

2021
from executorch.backends.arm.test import common, conftest
2122
from executorch.backends.arm.test.tester.test_pipeline import (
@@ -99,6 +100,14 @@ def prepare_model(self):
99100
return llama_model, llama_inputs, llama_meta
100101

101102

103+
def _use_partial_quantizer(pipeline):
104+
"""Set the pipeline's quantizer to only include Linear layers"""
105+
pipeline.quantizer.set_global(None)
106+
pipeline.quantizer.set_module_type(
107+
torch.nn.Linear, get_symmetric_quantization_config()
108+
)
109+
110+
102111
def test_llama_tosa_FP():
103112
llama_model, llama_inputs, llama_meta = TestLlama().prepare_model()
104113

@@ -179,3 +188,21 @@ def test_llama_vgf_quant():
179188
quantize=True,
180189
)
181190
pipeline.run()
191+
192+
193+
def test_llama_partial_quant_tosa_INT_FP():
194+
llama_model, llama_inputs, llama_meta = TestLlama().prepare_model()
195+
196+
if llama_model is None or llama_inputs is None:
197+
pytest.skip("Missing model and/or input files")
198+
199+
with torch.no_grad():
200+
pipeline = TosaPipelineINT[input_t](
201+
llama_model,
202+
llama_inputs,
203+
aten_op=[],
204+
exir_op=[],
205+
tosa_extensions=["FP"],
206+
)
207+
_use_partial_quantizer(pipeline)
208+
pipeline.run()

backends/arm/test/models/test_mobilenet_v2_arm.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import pytest
1111

1212
import torch
13+
from executorch.backends.arm.quantizer import get_symmetric_quantization_config
1314
from executorch.backends.arm.test import common
1415
from executorch.backends.arm.test.tester.test_pipeline import (
1516
EthosU55PipelineINT,
@@ -39,6 +40,14 @@
3940
}
4041

4142

43+
def _use_partial_quantizer(pipeline):
44+
"""Set the pipeline's quantizer to only include Conv2d and ReLU6"""
45+
quant_cfg = get_symmetric_quantization_config()
46+
pipeline.quantizer.set_global(None)
47+
pipeline.quantizer.set_module_type(torch.nn.Conv2d, quant_cfg)
48+
pipeline.quantizer.set_module_type(torch.nn.ReLU6, quant_cfg)
49+
50+
4251
def test_mv2_tosa_FP():
4352
pipeline = TosaPipelineFP[input_t](
4453
mv2, model_inputs, aten_op=[], exir_op=[], use_to_edge_transform_and_lower=True
@@ -140,3 +149,17 @@ def test_mv2_vgf_no_quant():
140149
quantize=False,
141150
)
142151
pipeline.run()
152+
153+
154+
def test_mv2_partial_quant_tosa_INT_FP():
155+
pipeline = TosaPipelineINT[input_t](
156+
mv2,
157+
model_inputs,
158+
aten_op=[],
159+
exir_op=[],
160+
tosa_extensions=["FP"],
161+
use_to_edge_transform_and_lower=True,
162+
atol=0.20,
163+
)
164+
_use_partial_quantizer(pipeline)
165+
pipeline.run()

0 commit comments

Comments
 (0)