diff --git a/auto_round/compressors/utils.py b/auto_round/compressors/utils.py index db95d9547..bb1bb9cf1 100644 --- a/auto_round/compressors/utils.py +++ b/auto_round/compressors/utils.py @@ -866,8 +866,30 @@ def get_fp_layer_names(model: torch.nn.Module, ignore_layers: str): """ from auto_round.utils import SUPPORTED_LAYER_TYPES - if not ignore_layers: - return [] + not_to_quantized_layers = [] + + for n, m in model.name_modules(): + if is_fp8_linear(m): + not_to_quantized_layers.append(n) + logger.trace(f"Auto-detected FP8 layer to ignore : {n}") + + if ignore_layers: + ignore_list = ignore_layers.replace(" ", "").split(",") + for fp_layer in ignore_list: + if not fp_layer: + continue + for n, _ in model.named_modules(): + match_name = fp_layer + if fp_layer[-1].isdigit(): + match_name += "." + if match_name in n: + if n not in not_to_quantized_layers: + not_to_quantized_layers.append(n) + logger.trace(f"User-specified ignore layer matched {n}:") + + logger.trace(f"not_to_quantized_layers: {not_to_quantized_layers}") + return not_to_quantized_layers + ignore_layers = ignore_layers.replace(" ", "").split(",") all_layer_names = [] for n, m in model.named_modules():