@@ -300,7 +300,8 @@ def save_quantized_model(
300300 save_format : str = "gguf" ,
301301 save_tokenizer : bool = True ,
302302 quant_config : Optional [Dict [str , Any ]] = None ,
303- safe_serialization : bool = True
303+ safe_serialization : bool = True ,
304+ verbose : bool = False
304305 ):
305306 """
306307 Save a quantized model in either GGUF or safetensors format.
@@ -312,52 +313,49 @@ def save_quantized_model(
312313 save_tokenizer: Whether to save the tokenizer
313314 quant_config: Optional quantization configuration
314315 safe_serialization: Whether to use safe serialization for safetensors format
316+ verbose: Whether to show detailed progress logs
315317 """
316318 try :
317- logger .log_info ("\n " + "=" * 80 )
318- logger .log_info (f"Starting Model Export Process ({ save_format .upper ()} )" .center (80 ))
319- logger .log_info ("=" * 80 + "\n " )
320-
321- # Log model details
322- total_params = sum (p .numel () for p in model .parameters ())
323- model_size_gb = sum (p .numel () * p .element_size () for p in model .parameters ()) / (1024 ** 3 )
324-
325- logger .log_info ("📊 Model Information:" )
326- logger .log_info ("-" * 40 )
327- logger .log_info (f"• Architecture: { model .config .model_type } " )
328- logger .log_info (f"• Total Parameters: { total_params :,} " )
329- logger .log_info (f"• Model Size: { model_size_gb :.2f} GB" )
330- logger .log_info (f"• Export Format: { save_format .upper ()} " )
331- logger .log_info ("" )
319+ if not verbose :
320+ logger .log_info (f"Converting model to { save_format .upper ()} format..." )
321+ else :
322+ logger .log_info ("\n " + "=" * 80 )
323+ logger .log_info (f"Starting Model Export Process ({ save_format .upper ()} )" .center (80 ))
324+ logger .log_info ("=" * 80 + "\n " )
325+
326+ # Log model details
327+ total_params = sum (p .numel () for p in model .parameters ())
328+ model_size_gb = sum (p .numel () * p .element_size () for p in model .parameters ()) / (1024 ** 3 )
329+
330+ logger .log_info ("📊 Model Information:" )
331+ logger .log_info ("-" * 40 )
332+ logger .log_info (f"• Architecture: { model .config .model_type } " )
333+ logger .log_info (f"• Total Parameters: { total_params :,} " )
334+ logger .log_info (f"• Model Size: { model_size_gb :.2f} GB" )
335+ logger .log_info (f"• Export Format: { save_format .upper ()} " )
336+ logger .log_info ("" )
332337
333338 # Get quantization info
334- if not quant_config :
335- if hasattr (model .config , 'quantization_config' ):
336- config_dict = model .config .quantization_config
337- if isinstance (config_dict , BitsAndBytesConfig ):
338- # Handle BitsAndBytesConfig
339- bits = 4 if config_dict .load_in_4bit else (8 if config_dict .load_in_8bit else 16 )
340- quant_config = {
341- 'bits' : bits ,
342- 'group_size' : 128 , # Default group size
343- 'quant_type' : f"Q{ bits } _K_M" if bits <= 8 else "F16"
344- }
339+ if not quant_config and hasattr (model .config , 'quantization_config' ):
340+ config_dict = model .config .quantization_config
341+ if isinstance (config_dict , BitsAndBytesConfig ):
342+ bits = 4 if config_dict .load_in_4bit else (8 if config_dict .load_in_8bit else 16 )
343+ quant_config = {
344+ 'bits' : bits ,
345+ 'group_size' : 128 ,
346+ 'quant_type' : f"Q{ bits } _K_M" if bits <= 8 else "F16"
347+ }
348+ if verbose :
345349 logger .log_info ("📊 Quantization Configuration:" )
346350 logger .log_info ("-" * 40 )
347351 logger .log_info (f"• Bits: { bits } " )
348352 logger .log_info (f"• Quantization Type: { quant_config ['quant_type' ]} " )
349353 if config_dict .load_in_4bit :
350354 logger .log_info (f"• 4-bit Type: { config_dict .bnb_4bit_quant_type } " )
351355 logger .log_info (f"• Compute dtype: { config_dict .bnb_4bit_compute_dtype } " )
352- else :
353- quant_config = config_dict
356+ logger .log_info ("" )
354357 else :
355- logger .log_info ("\n Using default 4-bit quantization settings" )
356- quant_config = {
357- 'bits' : 4 ,
358- 'group_size' : 128 ,
359- 'quant_type' : "Q4_K_M"
360- }
358+ quant_config = config_dict
361359
362360 # Create output directory
363361 output_dir = os .path .dirname (output_path ) or "."
@@ -371,35 +369,38 @@ def save_quantized_model(
371369 gguf_path = converter .convert_to_gguf (
372370 model = model ,
373371 output_dir = output_dir ,
374- bits = quant_config ['bits' ],
375- group_size = quant_config .get ('group_size' , 128 ),
372+ bits = quant_config ['bits' ] if quant_config else 4 ,
373+ group_size = quant_config .get ('group_size' , 128 ) if quant_config else 128 ,
376374 save_tokenizer = save_tokenizer
377375 )
378376
379- logger .log_info ("\n ✨ GGUF export completed successfully!" )
377+ if verbose :
378+ file_size = os .path .getsize (gguf_path ) / (1024 ** 3 )
379+ logger .log_info (f"\n GGUF model saved ({ file_size :.2f} GB): { gguf_path } " )
380+ else :
381+ logger .log_info ("✓ GGUF conversion completed successfully!" )
380382
381383 else : # safetensors format
382- logger . log_info ( " \n 💾 Saving model in safetensors format:" )
383- logger .log_info ("-" * 40 )
384+ if verbose :
385+ logger .log_info ("\n 💾 Saving model in safetensors format..." )
384386
385387 # Save the model
386388 model .save_pretrained (
387389 output_dir ,
388390 safe_serialization = safe_serialization
389391 )
390- logger .log_info ("• Model weights saved successfully" )
391392
392393 # Save tokenizer if requested
393394 if save_tokenizer and hasattr (model , 'tokenizer' ):
394- logger .log_info ("• Saving tokenizer..." )
395395 model .tokenizer .save_pretrained (output_dir )
396396
397- logger .log_info ("\n ✨ Safetensors export completed successfully!" )
398-
399- logger .log_info ("=" * 80 )
397+ if verbose :
398+ logger .log_info ("✓ Model saved successfully in safetensors format!" )
399+ else :
400+ logger .log_info ("✓ Model saved successfully!" )
400401
401402 except Exception as e :
402- logger .log_error (f"\n ❌ Failed to save model: { str (e )} " )
403+ logger .log_error (f"Failed to save model: { str (e )} " )
403404 raise
404405 finally :
405406 if torch .cuda .is_available ():
0 commit comments