@@ -477,6 +477,7 @@ struct llama_file_loader {
477477 case GGML_TYPE_F32:
478478 case GGML_TYPE_F16:
479479 case GGML_TYPE_Q4_0:
480+ case GGML_TYPE_Q4_0C:
480481 case GGML_TYPE_Q4_1:
481482 case GGML_TYPE_Q4_2:
482483 break ;
@@ -550,6 +551,7 @@ struct llama_file_saver {
550551 case GGML_TYPE_F32:
551552 case GGML_TYPE_F16:
552553 case GGML_TYPE_Q4_0:
554+ case GGML_TYPE_Q4_0C:
553555 case GGML_TYPE_Q4_1:
554556 case GGML_TYPE_Q4_2:
555557 break ;
@@ -837,6 +839,7 @@ static const char *llama_ftype_name(enum llama_ftype ftype) {
837839 case LLAMA_FTYPE_ALL_F32: return " all F32" ;
838840 case LLAMA_FTYPE_MOSTLY_F16: return " mostly F16" ;
839841 case LLAMA_FTYPE_MOSTLY_Q4_0: return " mostly Q4_0" ;
842+ case LLAMA_FTYPE_MOSTLY_Q4_0C: return " mostly Q4_1C" ;
840843 case LLAMA_FTYPE_MOSTLY_Q4_1: return " mostly Q4_1" ;
841844 case LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16:
842845 return " mostly Q4_1, some F16" ;
@@ -1573,6 +1576,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
15731576 ggml_type quantized_type;
15741577 switch (ftype) {
15751578 case LLAMA_FTYPE_MOSTLY_Q4_0: quantized_type = GGML_TYPE_Q4_0; break ;
1579+ case LLAMA_FTYPE_MOSTLY_Q4_0C: quantized_type = GGML_TYPE_Q4_0C; break ;
15761580 case LLAMA_FTYPE_MOSTLY_Q4_1: quantized_type = GGML_TYPE_Q4_1; break ;
15771581 case LLAMA_FTYPE_MOSTLY_Q4_2: quantized_type = GGML_TYPE_Q4_2; break ;
15781582 default : throw format (" invalid output file type %d\n " , ftype);
@@ -1644,6 +1648,10 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
16441648 {
16451649 new_size = ggml_quantize_q4_0 (f32_data, new_data, nelements, (int ) tensor.ne .at (0 ), hist_cur.data ());
16461650 } break ;
1651+ case GGML_TYPE_Q4_0C:
1652+ {
1653+ new_size = ggml_quantize_q4_0c (f32_data, new_data, nelements, (int ) tensor.ne .at (0 ), hist_cur.data ());
1654+ } break ;
16471655 case GGML_TYPE_Q4_1:
16481656 {
16491657 new_size = ggml_quantize_q4_1 (f32_data, new_data, nelements, (int ) tensor.ne .at (0 ), hist_cur.data ());
0 commit comments