diff --git a/scripts/float16.py b/scripts/float16.py index 0be170f33..647c5df0e 100644 --- a/scripts/float16.py +++ b/scripts/float16.py @@ -135,13 +135,13 @@ def convert_tensor_float_to_float16(tensor, min_positive_val=1e-7, max_finite_va # convert raw_data (bytes type) if tensor.raw_data: # convert n.raw_data to float - float32_list = np.fromstring(tensor.raw_data, dtype="float32") + float32_list = np.frombuffer(tensor.raw_data, dtype="float32") # convert float to float16 float16_list = convert_np_to_float16( float32_list, min_positive_val, max_finite_val ) # convert float16 to bytes and write back to raw_data - tensor.raw_data = float16_list.tostring() + tensor.raw_data = float16_list.tobytes() return tensor diff --git a/scripts/quantize.py b/scripts/quantize.py index 3f73f0916..42e92dd26 100644 --- a/scripts/quantize.py +++ b/scripts/quantize.py @@ -12,7 +12,7 @@ from onnxruntime.quantization import QuantType, QuantizationMode from onnxruntime.quantization.onnx_quantizer import ONNXQuantizer from onnxruntime.quantization.registry import IntegerOpsRegistry -from onnxruntime.quantization.matmul_4bits_quantizer import MatMul4BitsQuantizer +from onnxruntime.quantization.matmul_nbits_quantizer import MatMulNBitsQuantizer from onnxruntime.quantization.matmul_bnb4_quantizer import MatMulBnb4Quantizer from . import float16 @@ -234,7 +234,8 @@ def quantize_q4( Quantize the weights of the model from float32 to 4-bit int """ - quantizer = MatMul4BitsQuantizer( + # Default algo_config is 4 bits quantization + quantizer = MatMulNBitsQuantizer( model=model, block_size=block_size, is_symmetric=is_symmetric, diff --git a/scripts/requirements.txt b/scripts/requirements.txt index 6d18662f4..9adb65b35 100644 --- a/scripts/requirements.txt +++ b/scripts/requirements.txt @@ -1,7 +1,4 @@ -transformers[torch]==4.49.0 -onnxruntime==1.20.1 -optimum@git+https://github.com/huggingface/optimum.git@b04feaea78cda58d79b8da67dca3fd0c4ab33435 -onnx==1.17.0 -tqdm==4.67.1 -onnxslim==0.1.48 -numpy==2.2.6 +optimum-onnx==0.0.3 +onnxruntime==1.23.2 +onnxslim==0.1.78 +accelerate==1.12.0 \ No newline at end of file