Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file modified .pre-commit-config.yaml
100644 → 100755
Empty file.
2 changes: 1 addition & 1 deletion auto_round/__init__.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from auto_round.autoround import AutoRound

# support for old api
from auto_round.autoround import AutoRoundLLM, AutoRoundMLLM, AutoRoundAdam, AutoRoundDiffusion
from auto_round.autoround import AutoRoundLLM, AutoRoundMLLM, AutoRoundDiffusion
from auto_round.schemes import QuantizationScheme
from auto_round.auto_scheme import AutoScheme
from auto_round.utils import LazyImport
Expand Down
107 changes: 0 additions & 107 deletions auto_round/autoround.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
import torch

from auto_round.compressors import (
AdamCompressor,
BaseCompressor,
DiffusionCompressor,
ExtraConfig,
Expand Down Expand Up @@ -173,8 +172,6 @@ def __new__(
extra_config.diffusion_config = None
model_cls.append(LLMCompressor)

if enable_adam:
model_cls.append(AdamCompressor)
dynamic_compressor = type("AutoRound", tuple(model_cls), {})
if extra_config:
kwargs.update(extra_config.to_dict())
Expand Down Expand Up @@ -371,110 +368,6 @@ def __init__(
)


@deprecated("AutoRound")
class AutoRoundAdam(AdamCompressor):
"""Class for quantization with optimizers like adamw of a PyTorch model.

Args:
model: The PyTorch model to be quantized.
tokenizer: An optional tokenizer for processing input data.
platform (str): The platform to load pretrained moded, options: ["hf", "model_scope"]
scheme (str| dict | QuantizationScheme ): A preset scheme that defines the quantization configurations
bits (int): Number of bits for quantization (default is 4).
group_size (int): Size of the quantization group (default is 128).
sym (bool): Whether sym to be used (default is True).
layer_config (dict): Configuration for weight quantization (default is None).
batch_size (int): Batch size for training (default is 8).
amp (bool): Whether to use automatic mixed precision (default is True).
device: The device to be used for training (default is "auto").
lr_scheduler: The learning rate scheduler to be used.
dataset: The default dataset name (default is "NeelNanda/pile-10k").
enable_quanted_input (bool): Whether to use quantized input data (default is True).
enable_minmax_tuning (bool): Whether to enable min-max tuning (default is True).
lr (float): The learning rate (default is 0.005).
minmax_lr (float): The learning rate for min-max tuning (default is None).
low_gpu_mem_usage (bool): Whether to use low GPU memory (default is False).
iters (int): Number of iterations (default is 200).
seqlen (int): Length of the sequence.
nsamples (int): Number of samples (default is 128).
sampler (str): The sampling method (default is "rand").
seed (int): The random seed (default is 42).
nblocks (int): Number of blocks (default is 1).
gradient_accumulate_steps (int): Number of gradient accumulation steps (default is 1).
not_use_best_mse (bool): Whether to use mean squared error (default is False).
dynamic_max_gap (int): The dynamic maximum gap (default is -1).
data_type (str): The data type to be used (default is "int").
scale_dtype (str): The data type of quantization scale to be used (default is "float16"), different kernels
have different choices.
act_bits (int): Number of bits for activation quantization. Default is 16.
act_group_size (int): Group size for activation quantization. Default is None.
act_sym (bool): Whether to use symmetric activation quantization. Default is None.
act_data_type (str): Specifies the data type for activations.
Defaults to None, in which case it inherits the weight data type.
act_dynamic (bool): Whether to use dynamic activation quantization. Default is True.
to_quant_block_names (str|list): A string or list whose elements are list of
block's layer names to be quantized.
enable_norm_bias_tuning (bool): Whether to enable fast norm/layer_bias tuning
enable_torch_compile (bool): Whether to enable torch compile to optimize quant_block/layer function
**kwargs: Additional keyword arguments.

Returns:
The quantized model.
"""

bits: int | None
group_size: int | None
sym: bool | None
data_type: str | None
act_bits: int | None
act_group_size: int | None
act_sym: bool | None
act_data_type: str | None
act_dynamic: bool | None
super_bits: int | None
super_group_size: int | None

def __init__(
self,
model: Union[torch.nn.Module, str],
tokenizer=None,
platform: str = "hf",
scheme: Union[str, dict, QuantizationScheme] = "W4A16",
layer_config: dict[str, Union[str, dict, QuantizationScheme]] = None,
dataset: Union[str, list, tuple, torch.utils.data.DataLoader] = "NeelNanda/pile-10k",
iters: int = 200,
seqlen: int = 2048,
nsamples: int = 128,
batch_size: int = 8,
gradient_accumulate_steps: int = 1,
low_gpu_mem_usage: bool = False,
device_map: Union[str, int, torch.device, dict] = 0,
enable_torch_compile: bool = False,
seed: int = 42,
optimizer="AdamW",
**kwargs,
):
super().__init__(
model=model,
tokenizer=tokenizer,
platform=platform,
scheme=scheme,
layer_config=layer_config,
batch_size=batch_size,
dataset=dataset,
low_gpu_mem_usage=low_gpu_mem_usage,
iters=iters,
seqlen=seqlen,
nsamples=nsamples,
seed=seed,
gradient_accumulate_steps=gradient_accumulate_steps,
enable_torch_compile=enable_torch_compile,
device_map=device_map,
optimizer=optimizer,
**kwargs,
)


@deprecated("AutoRound")
class AutoRoundMLLM(MLLMCompressor):
"""Class for automatic rounding-based quantization with MLLMs.
Expand Down
1 change: 0 additions & 1 deletion auto_round/compressors/__init__.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from auto_round.compressors.adam import AdamCompressor
from auto_round.compressors.base import BaseCompressor
from auto_round.compressors.base import LLMCompressor
from auto_round.compressors.mllm.compressor import MLLMCompressor
Expand Down
164 changes: 0 additions & 164 deletions auto_round/compressors/adam.py

This file was deleted.

Loading
Loading