diff --git a/.azure-pipelines/scripts/install_nc.sh b/.azure-pipelines/scripts/install_nc.sh index 8f6f7b4800b..e16ab09b87e 100644 --- a/.azure-pipelines/scripts/install_nc.sh +++ b/.azure-pipelines/scripts/install_nc.sh @@ -9,8 +9,8 @@ if [[ $1 = *"3x_pt"* ]]; then python setup.py pt bdist_wheel else echo -e "\n Install torch CPU ... " - pip install torch==2.8.0 torchvision --index-url https://download.pytorch.org/whl/cpu - python -m pip install intel-extension-for-pytorch==2.8.0 oneccl_bind_pt --index-url https://pytorch-extension.intel.com/release-whl/stable/cpu/us/ + pip install torch==2.9.0 torchvision --index-url https://download.pytorch.org/whl/cpu + # python -m pip install intel-extension-for-pytorch==2.8.0 oneccl_bind_pt --index-url https://pytorch-extension.intel.com/release-whl/stable/cpu/us/ python -m pip install --no-cache-dir -r requirements.txt python setup.py bdist_wheel fi diff --git a/test/3x/torch/quantization/test_autoround.py b/test/3x/torch/quantization/test_autoround.py index 1a40c0442ab..df0dc08eb4a 100644 --- a/test/3x/torch/quantization/test_autoround.py +++ b/test/3x/torch/quantization/test_autoround.py @@ -173,7 +173,7 @@ def test_conv1d(self): tokenizer = AutoTokenizer.from_pretrained("MBZUAI/LaMini-GPT-124M", trust_remote_code=True) text = "Replace me by any text you'd like." encoded_input = tokenizer(text, return_tensors="pt") - quant_config = AutoRoundConfig(nsamples=32, seqlen=10, iters=0,tokenizer=tokenizer,export_format="auto_round") + quant_config = AutoRoundConfig(nsamples=32, seqlen=10, iters=0, amp=False, tokenizer=tokenizer, export_format="auto_round") model = prepare(model=model, quant_config=quant_config) q_model = convert(model) output = tokenizer.decode(q_model.generate(**encoded_input, max_new_tokens=10)[0]) @@ -267,6 +267,7 @@ def test_mllm(self): # q_model.save(output_dir="saved_results_tiny-random-GPTJForCausalLM", format="huggingface") # loaded_model = load("saved_results_tiny-random-GPTJForCausalLM", format="huggingface", trust_remote_code=True) + @pytest.mark.skipif(not Version(torch.__version__) < Version("2.9.0"), reason="only for torch<2.9.0 [ipex]") def test_set_local(self): fp32_model = AutoModelForCausalLM.from_pretrained( "facebook/opt-125m", @@ -324,6 +325,10 @@ def test_set_local(self): @pytest.mark.skipif(not ct_installed, reason="The compressed-tensors module is not installed.") @pytest.mark.parametrize("scheme", ["W4A16","W2A16","W3A16","W8A16","MXFP4","MXFP8", "NVFP4","FPW8A16","FP8_STATIC"]) def test_scheme(self, scheme): + # Skip W4A16 scheme when torch version >= 2.9.0 + if scheme == "W4A16" and not Version(torch.__version__) < Version("2.9.0"): + pytest.skip("W4A16 scheme is not supported for torch >= 2.9.0 on cpu") + # INC API fp32_model = AutoModelForCausalLM.from_pretrained( "facebook/opt-125m", diff --git a/test/3x/torch/quantization/test_smooth_quant.py b/test/3x/torch/quantization/test_smooth_quant.py index 147c1adfe1d..ea8c15ddfbe 100644 --- a/test/3x/torch/quantization/test_smooth_quant.py +++ b/test/3x/torch/quantization/test_smooth_quant.py @@ -4,6 +4,7 @@ import pytest import torch +from packaging.version import Version from neural_compressor.torch.quantization import SmoothQuantConfig, convert, get_default_sq_config, prepare, quantize from neural_compressor.torch.utils import is_ipex_available @@ -33,8 +34,7 @@ def forward(self, x): def run_fn(model): for i in range(10): model(example_inputs) - - +@pytest.mark.skipif(not Version(torch.__version__) < Version("2.9.0"), reason="only for torch<2.9.0 [ipex]") class TestSmoothQuant: def teardown_class(self): shutil.rmtree("saved_results", ignore_errors=True) diff --git a/test/3x/torch/quantization/test_static_quant.py b/test/3x/torch/quantization/test_static_quant.py index a03b2939425..29f506040af 100644 --- a/test/3x/torch/quantization/test_static_quant.py +++ b/test/3x/torch/quantization/test_static_quant.py @@ -4,6 +4,7 @@ import pytest import torch +from packaging.version import Version try: import intel_extension_for_pytorch as ipex @@ -52,7 +53,7 @@ def run_fn(model): model(torch.rand((1, 30))) model(torch.rand((1, 30))) - +@pytest.mark.skipif(not Version(torch.__version__) < Version("2.9.0"), reason="only for torch<2.9.0 [ipex]") class TestStaticQuant: def setup_class(self): self.fp32_model = build_simple_torch_model() diff --git a/test/3x/torch/quantization/weight_only/test_transformers.py b/test/3x/torch/quantization/weight_only/test_transformers.py index 6310fdafcef..6c291f81e35 100644 --- a/test/3x/torch/quantization/weight_only/test_transformers.py +++ b/test/3x/torch/quantization/weight_only/test_transformers.py @@ -23,6 +23,7 @@ ipex_version = get_ipex_version() + try: import auto_round @@ -30,6 +31,7 @@ except ImportError: auto_round_installed = False +@pytest.mark.skipif(not Version(torch.__version__) < Version("2.9.0"), reason="only for torch<2.9.0 [ipex]") class TestTansformersLikeAPI: def setup_class(self): self.model_name_or_path = "hf-tiny-model-private/tiny-random-GPTJForCausalLM"