@@ -1468,6 +1468,7 @@ class GGMLQuantizationType(IntEnum):
14681468 Q4_0_4_8 = 32
14691469 Q4_0_8_8 = 33
14701470 I2_S = 36
1471+ MXFP4 = 39
14711472 Q8_0_X4 = 97
14721473 Q8_1_X4 = 98
14731474 Q8_2_X4 = 99
@@ -1559,9 +1560,10 @@ class LlamaFileType(IntEnum):
15591560 MOSTLY_IQ4_XS = 22 #except 1d tensors
15601561 MOSTLY_IQ1_M = 23 #except 1d tensors
15611562 MOSTLY_BF16 = 24 #except 1d tensors
1562- MOSTLY_Q4_0_4_4 = 25 #except 1d tensors
1563- MOSTLY_Q4_0_4_8 = 26 #except 1d tensors
1564- MOSTLY_Q4_0_8_8 = 27 #except 1d tensors
1563+ MOSTLY_MXFP4 = 25 #except 1d tensors
1564+ MOSTLY_Q4_0_4_4 = 26 #except 1d tensors
1565+ MOSTLY_Q4_0_4_8 = 27 #except 1d tensors
1566+ MOSTLY_Q4_0_8_8 = 28 #except 1d tensors
15651567 MOSTLY_Q6_0 = 127 #except 1d tensors
15661568 MOSTLY_IQ1_BN = 128 #except 1d tensors
15671569 MOSTLY_IQ2_BN = 129 #except 1d tensors
@@ -1682,6 +1684,7 @@ def get_type(val: Any) -> GGUFValueType:
16821684 GGMLQuantizationType .F64 : ( 1 , 8 ),
16831685 GGMLQuantizationType .IQ1_M : ( 256 , 56 ),
16841686 GGMLQuantizationType .BF16 : ( 1 , 2 ),
1687+ GGMLQuantizationType .MXFP4 : ( 32 , 17 ),
16851688 GGMLQuantizationType .Q4_0_4_4 : ( 32 , 18 ),
16861689 GGMLQuantizationType .Q4_0_4_8 : ( 32 , 18 ),
16871690 GGMLQuantizationType .Q4_0_8_8 : ( 32 , 18 ),
0 commit comments