Arm backend: Set fixed qparams for acos, asin and atanh

Martin Lindström · martinlsm · commit fb90480a261a · 2026-03-19T08:28:04.000+01:00
acos, asin and atanh have shown to be problematic to quantize properly
due to their limited input range between [-1, 1] (inclusively for asin
and acos and exclusively for atanh). Before this patch, the approach for
quantizing these ops was to use the quantization spec from the
quantization config (typically a HistogramObserver if using deafult
symmetric quantization config). This caused problems when calibrating
the model with inputs close to -1 or 1, because they could land outside
the valid range of the operator. When this happened, the resulting TABLE
op set the output of these outliers to zero, which is not ideal.

To mitigate this problem, use fixed quantization params for these ops by
statically defining them in quantization_annotator.py. With this
solution, we potentially lose a bit of numerical precision because the
ops won't be affected by quantization calibration at all, but the
resulting TABLE ops won't set any zeros since there is no input that
can be outside the [-1, 1] interval anymore.

Signed-off-by: Martin Lindström &lt;Martin.Lindstroem@arm.com&gt;
Change-Id: Id55156be5ca7fcfbf9f9c3f8ae88fb075509ce0c
diff --git a/backends/arm/quantizer/quantization_annotator.py b/backends/arm/quantizer/quantization_annotator.py
@@ -13,25 +13,27 @@
 import logging
 import operator
 from dataclasses import dataclass, replace
-from typing import Callable, cast, Iterable, List, Optional, Sequence
+from typing import Any, Callable, cast, Iterable, List, NamedTuple, Optional, Sequence
 
 import torch
 import torch.fx
 from executorch.backends.arm.common.debug import get_node_debug_info
 from executorch.backends.arm.common.type import ensure_type
 from executorch.backends.arm.quantizer import QuantizationConfig
-from torch._subclasses import FakeTensor
 
+from torch._subclasses import FakeTensor
 from torch.fx import Node
 from torchao.quantization.pt2e import (
     FakeQuantize,
     FusedMovingAvgObsFakeQuantize,
     MovingAveragePerChannelMinMaxObserver,
     PartialWrapper,
 )
+
 from torchao.quantization.pt2e.quantizer import (
     annotate_input_qspec_map,
     annotate_output_qspec,
+    FixedQParamsQuantizationSpec,
     QuantizationSpec,
     QuantizationSpecBase,
     SharedQuantizationSpec,
@@ -78,6 +80,11 @@ def __init__(self):
         self.quant_output: Optional[_QuantProperty] = None
 
 
+class _QParams(NamedTuple):
+    scale: float
+    zero_point: int
+
+
 def _as_list(x):
     """Return ``x`` wrapped as a list if needed.
 
@@ -443,6 +450,29 @@ def _match_pattern(
     torch.ops.aten.conv3d.padding,
 }
 
+# For these ops, we use fixed qspecs, meaning that quantization params for
+# these are statically defined. This is to prevent issues with out-of-range
+# values when using dynamic quantization.
+#
+# Dict of operator to a dict of num_bits to qparams for that operator.
+_fixed_input_qspec_ops: dict[Any, dict[int, _QParams]] = {
+    # acos has a valid range of [-1, 1]
+    torch.ops.aten.acos.default: {
+        8: _QParams((1.0 - (-1.0)) / (1 << 8), 0),
+        16: _QParams((1.0 - (-1.0)) / (1 << 16), 0),
+    },
+    # asin has a valid range of [-1, 1]
+    torch.ops.aten.asin.default: {
+        8: _QParams((1.0 - (-1.0)) / (1 << 8), 0),
+        16: _QParams((1.0 - (-1.0)) / (1 << 16), 0),
+    },
+    # atanh has a valid range of (-1, 1) (excluding -1 and 1).
+    torch.ops.aten.atanh.default: {
+        8: _QParams((0.999 - (-0.999)) / (1 << 8), 0),
+        16: _QParams((0.99999 - (-0.99999)) / (1 << 16), 0),
+    },
+}
+
 _one_to_one = {
     torch.ops.aten.abs.default,
     torch.ops.aten.ceil.default,
@@ -474,11 +504,8 @@ def _match_pattern(
     torch.ops.aten.log1p.default,
     torch.ops.aten.acosh.default,
     torch.ops.aten.sign.default,
-    torch.ops.aten.asin.default,
-    torch.ops.aten.atanh.default,
     torch.ops.aten.asinh.default,
     torch.ops.aten.cosh.default,
-    torch.ops.aten.acos.default,
     torch.ops.aten.cumsum.default,
     torch.ops.aten.tan.default,
 }
@@ -784,6 +811,25 @@ def any_or_hardtanh_min_zero(n: Node):
     elif node.target in _one_to_one:
         quant_properties.quant_inputs = [_QuantProperty(0, input_act_qspec)]
         quant_properties.quant_output = _QuantProperty(0, output_act_qspec)
+    elif node.target in _fixed_input_qspec_ops:
+        num_bits = torch.iinfo(input_act_qspec.dtype).bits
+        qparams = _fixed_input_qspec_ops[node.target][num_bits]
+
+        quant_properties.quant_inputs = [
+            _QuantProperty(
+                0,
+                FixedQParamsQuantizationSpec(
+                    dtype=input_act_qspec.dtype,
+                    scale=qparams.scale,
+                    zero_point=qparams.zero_point,
+                    quant_min=input_act_qspec.quant_min,
+                    quant_max=input_act_qspec.quant_max,
+                    qscheme=input_act_qspec.qscheme,
+                    is_dynamic=input_act_qspec.is_dynamic,
+                ),
+            )
+        ]
+        quant_properties.quant_output = _QuantProperty(0, output_act_qspec)
     elif node.target in _one_to_one_shared_input_qspec:
         input_node = ensure_type(Node, node.args[0])
         quant_properties.quant_inputs = [_QuantProperty(0, input_act_qspec)]
diff --git a/backends/arm/test/ops/test_acos.py b/backends/arm/test/ops/test_acos.py
@@ -65,7 +65,6 @@ def test_acos_tosa_INT(test_data: Tuple):
         (test_data(),),
         aten_op=aten_op,
         exir_op=exir_op,
-        frobenius_threshold=0.5,  # MLETORCH-1709
     )
     pipeline.run()
 
diff --git a/backends/arm/test/ops/test_asin.py b/backends/arm/test/ops/test_asin.py
@@ -55,8 +55,6 @@ def test_asin_tosa_INT(test_data: Tuple):
         (test_data(),),
         aten_op=[],
         exir_op=[],
-        frobenius_threshold=0.6,  # MLETORCH-1709
-        cosine_threshold=0.8,  # MLETORCH-1709
     )
     pipeline.run()
 
diff --git a/backends/arm/test/ops/test_atanh.py b/backends/arm/test/ops/test_atanh.py
@@ -26,11 +26,10 @@
 test_data_suite = {
     "zeros": torch.zeros(1, 10, 10, 10),
     "zeros_alt_shape": torch.zeros(1, 10, 3, 5),
-    "ones": torch.ones(10, 10, 10),
     "rand": torch.rand(10, 10) - 0.5,
     "rand_alt_shape": torch.rand(1, 10, 3, 5) - 0.5,
     "ramp": torch.arange(-1, 1, 0.2),
-    "near_bounds": torch.tensor([-0.999999, -0.999, -0.9, 0.9, 0.999, 0.999999]),
+    "near_bounds": torch.tensor([-0.99, -0.9, 0.9, 0.99]),
     "on_bounds": torch.tensor([-1.0, 1.0]),
 }
 
@@ -58,9 +57,11 @@ def test_atanh_tosa_INT(test_data: Tuple):
         (test_data,),
         aten_op=aten_op,
         exir_op=exir_op,
-        frobenius_threshold=None,  # MLETORCH-1709
-        cosine_threshold=0.7,
     )
+    if torch.any(test_data >= 1) or torch.any(test_data <= -1):
+        # The quantized model will saturate to max/min values while the
+        # original model will return inf/-inf, so comparison wont be valid here.
+        pipeline.pop_stage("run_method_and_compare_outputs.original_model")
     pipeline.run()
 
 

Original file line number	Diff line number	Diff line change
`@@ -65,7 +65,6 @@ def test_acos_tosa_INT(test_data: Tuple):`
`65`	`65`	`(test_data(),),`
`66`	`66`	`aten_op=aten_op,`
`67`	`67`	`exir_op=exir_op,`
`68`		`- frobenius_threshold=0.5, # MLETORCH-1709`
`69`	`68`	`)`
`70`	`69`	`pipeline.run()`
`71`	`70`
Original file line number	Diff line number	Diff line change
`@@ -55,8 +55,6 @@ def test_asin_tosa_INT(test_data: Tuple):`
`55`	`55`	`(test_data(),),`
`56`	`56`	`aten_op=[],`
`57`	`57`	`exir_op=[],`
`58`		`- frobenius_threshold=0.6, # MLETORCH-1709`
`59`		`- cosine_threshold=0.8, # MLETORCH-1709`
`60`	`58`	`)`
`61`	`59`	`pipeline.run()`
`62`	`60`