Support transposed input for data-aware Weights Compression#3296
Support transposed input for data-aware Weights Compression#3296rk119 wants to merge 40 commits intoopenvinotoolkit:developfrom
Conversation
|
@rk119, do you have any update? |
Hi @alexsu52, Apologies for my late response, I was traveling. I noticed that weights compression for onnx support is actively being implemented. @ljaljushkin, please clarify if it is required for me to add this in the current PR. At the moment, my changes won't make a noticeable difference in onnx, since data-aware weights compression and algorithms like |
Thanks for the contribution! @andrey-churkin, please cover review for onnx backend. |
…on.py Co-authored-by: Lyalyushkin Nikolay <nikolay.lyalyushkin@intel.com>
Co-authored-by: Lyalyushkin Nikolay <nikolay.lyalyushkin@intel.com>
Co-authored-by: Lyalyushkin Nikolay <nikolay.lyalyushkin@intel.com>
| def test_compression_with_transpose(transpose_a, transpose_b, raises_error, kwargs): | ||
| dataset_size = 4 | ||
| model = LMLinearModel(transpose_a=True, transpose_b=False).ov_model | ||
| model = LMLinearModel(transpose_a=transpose_a, transpose_b=transpose_b).ov_model |
There was a problem hiding this comment.
@rk119 could you please add AWQModel to this test case.
There was a problem hiding this comment.
Sure, just to confirm, should a model like the following be added in tests/openvino/native/quantization/test_weights_compression.py:
class AWQModel(OVReferenceModel):
OUTPUT_DIM = 32
HIDDEN_DIM = 16
INPUT_SHAPE = [1, 24, HIDDEN_DIM] # [B, SeqLen, HiddenDim]
def _create_ov_model(self, transpose_b: bool = True, transpose_a: bool = False, input_shape: Optional[list[int]] = None, is_int8=False):
self._input_shape = self.INPUT_SHAPE if input_shape is None else input_shape
hdim_axis = -2 if transpose_a else -1
self._hidden_dim = self._input_shape[hdim_axis]
input_1 = opset.parameter(self._input_shape, name="Input")
weight_shape = self.get_weight_shape(transpose_b)
data = self._rng.random(weight_shape).astype(np.float32)
weights = AWQMatmulModel.get_weights(data, is_int8=is_int8, name="weights_1")
matmul = opset.matmul(input_1, weights, transpose_a=transpose_a, transpose_b=transpose_b, name="MatMul")
result = opset.result(matmul, name="Result")
result.get_output_tensor(0).set_names(set(["Result"]))
model = ov.Model([result], [input_1])
return model
@property
def hidden_dim(self):
return self._hidden_dim
def get_weight_shape(self, transpose_b: bool = True):
return [self.OUTPUT_DIM, self.hidden_dim] if transpose_b else [self.hidden_dim, self.OUTPUT_DIM]
and the test to be updated like this:
@pytest.mark.parametrize(
"model",
[
(LMLinearModel),
(AWQModel),
],
ids=["lm_linear", "awq_model"],
)
@pytest.mark.parametrize(
("transpose_a", "transpose_b", "raises_error"),
[
(False, True, False),
(True, True, False),
(False, False, True),
(True, False, True),
],
ids=["tb_nota", "ta_tb", "nota_notb", "ta_notb"],
)
@pytest.mark.parametrize(
"kwargs",
[
dict(scale_estimation=True),
dict(lora_correction=True),
dict(
gptq=True,
awq=True,
scale_estimation=True,
advanced_parameters=CompressionParams(gptq_params=GPTQParams(subset_size=2)),
),
],
ids=["se", "lora", "gptq_se_awq"],
)
def test_compression_with_transpose(model, transpose_a, transpose_b, raises_error, kwargs):
dataset_size = 4
model = model(transpose_a=transpose_a, transpose_b=transpose_b).ov_model
input_data = [np.ones(inp.shape) for inp in model.inputs] * dataset_size
dataset = Dataset(input_data)
with (
pytest.raises(nncf.UnsupportedModelError)
if raises_error and not kwargs.get("lora_correction", False)
else nullcontext()
):
compress_weights(
model,
mode=CompressWeightsMode.INT4_SYM,
ratio=1.0,
group_size=8,
subset_size=2,
dataset=dataset,
all_layers=True,
**kwargs,
)
|
@daniil-lyakhov should it be closed since you're working on many related PRs? |
Changes
Added support for transpose of activations in
matmuloperation in Weights Compression.While collecting statistics, reduction axes is set according to whether the input is transposed or not. If the input is transposed, the second last size dimension is the hidden dimension else the last dimension is hidden.
Pass the input transpose value in
matmuloperation ininsert_adaptersfor OV backend to match the inner dimensions.Implemented a common backend function
get_activation_channel_axisfor inputtranspose_a=Truesupport.Closes Issue
#3230