@@ -16,44 +16,44 @@ struct quant_option {
1616};
1717
1818static const std::vector<struct quant_option > QUANT_OPTIONS = {
19- { " Q4_0" , LLAMA_FTYPE_MOSTLY_Q4_0, " 4.34G, +0.4685 ppl @ Llama-3-8B" , },
20- { " Q4_1" , LLAMA_FTYPE_MOSTLY_Q4_1, " 4.78G, +0.4511 ppl @ Llama-3-8B" , },
21- { " Q5_0" , LLAMA_FTYPE_MOSTLY_Q5_0, " 5.21G, +0.1316 ppl @ Llama-3-8B" , },
22- { " Q5_1" , LLAMA_FTYPE_MOSTLY_Q5_1, " 5.65G, +0.1062 ppl @ Llama-3-8B" , },
23- { " IQ2_XXS" ,LLAMA_FTYPE_MOSTLY_IQ2_XXS," 2.06 bpw quantization" , },
24- { " IQ2_XS" , LLAMA_FTYPE_MOSTLY_IQ2_XS, " 2.31 bpw quantization" , },
25- { " IQ2_S" , LLAMA_FTYPE_MOSTLY_IQ2_S, " 2.5 bpw quantization" , },
26- { " IQ2_M" , LLAMA_FTYPE_MOSTLY_IQ2_M, " 2.7 bpw quantization" , },
27- { " IQ1_S" , LLAMA_FTYPE_MOSTLY_IQ1_S, " 1.56 bpw quantization" , },
28- { " IQ1_M" , LLAMA_FTYPE_MOSTLY_IQ1_M, " 1.75 bpw quantization" , },
29- { " Q2_K" , LLAMA_FTYPE_MOSTLY_Q2_K, " 2.96G, +3.5199 ppl @ Llama-3-8B" , },
30- { " Q2_K_S" , LLAMA_FTYPE_MOSTLY_Q2_K_S, " 2.96G, +3.1836 ppl @ Llama-3-8B" , },
31- { " IQ3_XXS" ,LLAMA_FTYPE_MOSTLY_IQ3_XXS," 3.06 bpw quantization" , },
32- { " IQ3_S" , LLAMA_FTYPE_MOSTLY_IQ3_S, " 3.44 bpw quantization" , },
33- { " IQ3_M" , LLAMA_FTYPE_MOSTLY_IQ3_M, " 3.66 bpw quantization mix" , },
34- { " Q3_K" , LLAMA_FTYPE_MOSTLY_Q3_K_M, " alias for Q3_K_M" },
35- { " IQ3_XS" , LLAMA_FTYPE_MOSTLY_IQ3_XS, " 3.3 bpw quantization" , },
36- { " Q3_K_S" , LLAMA_FTYPE_MOSTLY_Q3_K_S, " 3.41G, +1.6321 ppl @ Llama-3-8B" , },
37- { " Q3_K_M" , LLAMA_FTYPE_MOSTLY_Q3_K_M, " 3.74G, +0.6569 ppl @ Llama-3-8B" , },
38- { " Q3_K_L" , LLAMA_FTYPE_MOSTLY_Q3_K_L, " 4.03G, +0.5562 ppl @ Llama-3-8B" , },
39- { " IQ4_NL" , LLAMA_FTYPE_MOSTLY_IQ4_NL, " 4.50 bpw non-linear quantization" , },
40- { " IQ4_XS" , LLAMA_FTYPE_MOSTLY_IQ4_XS, " 4.25 bpw non-linear quantization" , },
41- { " Q4_K" , LLAMA_FTYPE_MOSTLY_Q4_K_M, " alias for Q4_K_M" , },
42- { " Q4_K_S" , LLAMA_FTYPE_MOSTLY_Q4_K_S, " 4.37G, +0.2689 ppl @ Llama-3-8B" , },
43- { " Q4_K_M" , LLAMA_FTYPE_MOSTLY_Q4_K_M, " 4.58G, +0.1754 ppl @ Llama-3-8B" , },
44- { " Q5_K" , LLAMA_FTYPE_MOSTLY_Q5_K_M, " alias for Q5_K_M" , },
45- { " Q5_K_S" , LLAMA_FTYPE_MOSTLY_Q5_K_S, " 5.21G, +0.1049 ppl @ Llama-3-8B" , },
46- { " Q5_K_M" , LLAMA_FTYPE_MOSTLY_Q5_K_M, " 5.33G, +0.0569 ppl @ Llama-3-8B" , },
47- { " Q6_K" , LLAMA_FTYPE_MOSTLY_Q6_K, " 6.14G, +0.0217 ppl @ Llama-3-8B" , },
48- { " Q8_0" , LLAMA_FTYPE_MOSTLY_Q8_0, " 7.96G, +0.0026 ppl @ Llama-3-8B" , },
49- { " Q4_0_4_4" , LLAMA_FTYPE_MOSTLY_Q4_0_4_4, " 4.34G, +0.4685 ppl @ Llama-3-8B" , },
50- { " Q4_0_4_8" , LLAMA_FTYPE_MOSTLY_Q4_0_4_8, " 4.34G, +0.4685 ppl @ Llama-3-8B" , },
51- { " Q4_0_8_8" , LLAMA_FTYPE_MOSTLY_Q4_0_8_8, " 4.34G, +0.4685 ppl @ Llama-3-8B" , },
52- { " F16" , LLAMA_FTYPE_MOSTLY_F16, " 14.00G, +0.0020 ppl @ Mistral-7B" , },
53- { " BF16" , LLAMA_FTYPE_MOSTLY_BF16, " 14.00G, -0.0050 ppl @ Mistral-7B" , },
54- { " F32" , LLAMA_FTYPE_ALL_F32, " 26.00G @ 7B" , },
19+ { " Q4_0" , LLAMA_FTYPE_MOSTLY_Q4_0, " 4.34G, +0.4685 ppl @ Llama-3-8B" , },
20+ { " Q4_1" , LLAMA_FTYPE_MOSTLY_Q4_1, " 4.78G, +0.4511 ppl @ Llama-3-8B" , },
21+ { " Q5_0" , LLAMA_FTYPE_MOSTLY_Q5_0, " 5.21G, +0.1316 ppl @ Llama-3-8B" , },
22+ { " Q5_1" , LLAMA_FTYPE_MOSTLY_Q5_1, " 5.65G, +0.1062 ppl @ Llama-3-8B" , },
23+ { " IQ2_XXS" , LLAMA_FTYPE_MOSTLY_IQ2_XXS, " 2.06 bpw quantization" , },
24+ { " IQ2_XS" , LLAMA_FTYPE_MOSTLY_IQ2_XS, " 2.31 bpw quantization" , },
25+ { " IQ2_S" , LLAMA_FTYPE_MOSTLY_IQ2_S, " 2.5 bpw quantization" , },
26+ { " IQ2_M" , LLAMA_FTYPE_MOSTLY_IQ2_M, " 2.7 bpw quantization" , },
27+ { " IQ1_S" , LLAMA_FTYPE_MOSTLY_IQ1_S, " 1.56 bpw quantization" , },
28+ { " IQ1_M" , LLAMA_FTYPE_MOSTLY_IQ1_M, " 1.75 bpw quantization" , },
29+ { " Q2_K" , LLAMA_FTYPE_MOSTLY_Q2_K, " 2.96G, +3.5199 ppl @ Llama-3-8B" , },
30+ { " Q2_K_S" , LLAMA_FTYPE_MOSTLY_Q2_K_S, " 2.96G, +3.1836 ppl @ Llama-3-8B" , },
31+ { " IQ3_XXS" , LLAMA_FTYPE_MOSTLY_IQ3_XXS, " 3.06 bpw quantization" , },
32+ { " IQ3_S" , LLAMA_FTYPE_MOSTLY_IQ3_S, " 3.44 bpw quantization" , },
33+ { " IQ3_M" , LLAMA_FTYPE_MOSTLY_IQ3_M, " 3.66 bpw quantization mix" , },
34+ { " Q3_K" , LLAMA_FTYPE_MOSTLY_Q3_K_M, " alias for Q3_K_M" },
35+ { " IQ3_XS" , LLAMA_FTYPE_MOSTLY_IQ3_XS, " 3.3 bpw quantization" , },
36+ { " Q3_K_S" , LLAMA_FTYPE_MOSTLY_Q3_K_S, " 3.41G, +1.6321 ppl @ Llama-3-8B" , },
37+ { " Q3_K_M" , LLAMA_FTYPE_MOSTLY_Q3_K_M, " 3.74G, +0.6569 ppl @ Llama-3-8B" , },
38+ { " Q3_K_L" , LLAMA_FTYPE_MOSTLY_Q3_K_L, " 4.03G, +0.5562 ppl @ Llama-3-8B" , },
39+ { " IQ4_NL" , LLAMA_FTYPE_MOSTLY_IQ4_NL, " 4.50 bpw non-linear quantization" , },
40+ { " IQ4_XS" , LLAMA_FTYPE_MOSTLY_IQ4_XS, " 4.25 bpw non-linear quantization" , },
41+ { " Q4_K" , LLAMA_FTYPE_MOSTLY_Q4_K_M, " alias for Q4_K_M" , },
42+ { " Q4_K_S" , LLAMA_FTYPE_MOSTLY_Q4_K_S, " 4.37G, +0.2689 ppl @ Llama-3-8B" , },
43+ { " Q4_K_M" , LLAMA_FTYPE_MOSTLY_Q4_K_M, " 4.58G, +0.1754 ppl @ Llama-3-8B" , },
44+ { " Q5_K" , LLAMA_FTYPE_MOSTLY_Q5_K_M, " alias for Q5_K_M" , },
45+ { " Q5_K_S" , LLAMA_FTYPE_MOSTLY_Q5_K_S, " 5.21G, +0.1049 ppl @ Llama-3-8B" , },
46+ { " Q5_K_M" , LLAMA_FTYPE_MOSTLY_Q5_K_M, " 5.33G, +0.0569 ppl @ Llama-3-8B" , },
47+ { " Q6_K" , LLAMA_FTYPE_MOSTLY_Q6_K, " 6.14G, +0.0217 ppl @ Llama-3-8B" , },
48+ { " Q8_0" , LLAMA_FTYPE_MOSTLY_Q8_0, " 7.96G, +0.0026 ppl @ Llama-3-8B" , },
49+ { " Q4_0_4_4" , LLAMA_FTYPE_MOSTLY_Q4_0_4_4, " 4.34G, +0.4685 ppl @ Llama-3-8B" , },
50+ { " Q4_0_4_8" , LLAMA_FTYPE_MOSTLY_Q4_0_4_8, " 4.34G, +0.4685 ppl @ Llama-3-8B" , },
51+ { " Q4_0_8_8" , LLAMA_FTYPE_MOSTLY_Q4_0_8_8, " 4.34G, +0.4685 ppl @ Llama-3-8B" , },
52+ { " F16" , LLAMA_FTYPE_MOSTLY_F16, " 14.00G, +0.0020 ppl @ Mistral-7B" , },
53+ { " BF16" , LLAMA_FTYPE_MOSTLY_BF16, " 14.00G, -0.0050 ppl @ Mistral-7B" , },
54+ { " F32" , LLAMA_FTYPE_ALL_F32, " 26.00G @ 7B" , },
5555 // Note: Ensure COPY comes after F32 to avoid ftype 0 from matching.
56- { " COPY" , LLAMA_FTYPE_ALL_F32, " only copy tensors, no quantizing" , },
56+ { " COPY" , LLAMA_FTYPE_ALL_F32, " only copy tensors, no quantizing" , },
5757};
5858
5959static const char * const LLM_KV_QUANTIZE_IMATRIX_FILE = " quantize.imatrix.file" ;
0 commit comments