@@ -696,12 +696,12 @@ struct gguf_file_saver {
696696 // we need to calculate the delta in number of bytes written with a counter as a struct member.
697697
698698 gguf_file file;
699- gguf_file_loader * fl;
699+ gguf_context * ctx; // loaded gguf context (used to re-write the KV section (good enough for now))
700700 size_t info_offset;
701701 size_t tensor_offset = 0 ;
702702
703- gguf_file_saver (const char * fname, gguf_file_loader * fl )
704- : file(fname, " wb" ), fl(fl ) {
703+ gguf_file_saver (const char * fname, gguf_context * ctx )
704+ : file(fname, " wb" ), ctx(ctx ) {
705705 fprintf (stderr, " llama.cpp: saving model to %s\n " , fname);
706706 write_header ();
707707 write_kv ();
@@ -710,15 +710,15 @@ struct gguf_file_saver {
710710 void write_header () {
711711 file.write_i32 (GGUF_MAGIC);
712712 file.write_i32 (GGUF_VERSION);
713- file.write_i32 (gguf_get_n_tensors (fl-> gguf_ctx ));
714- file.write_i32 (gguf_get_n_kv (fl-> gguf_ctx ));
713+ file.write_i32 (gguf_get_n_tensors (ctx ));
714+ file.write_i32 (gguf_get_n_kv (ctx ));
715715 }
716716
717717 void write_kv_arr_str (const std::string & key, enum gguf_type type, int i, int n_arr) {
718718 std::vector<std::string> data (n_arr);
719719
720720 for (int j = 0 ; j < n_arr; ++j) {
721- std::string val = gguf_get_arr_str (fl-> gguf_ctx , i, j);
721+ std::string val = gguf_get_arr_str (ctx , i, j);
722722 data[j] = val;
723723 }
724724
@@ -729,7 +729,7 @@ struct gguf_file_saver {
729729 std::vector<float > data (n_arr);
730730
731731 for (int j = 0 ; j < n_arr; ++j) {
732- float val = gguf_get_arr_f32 (fl-> gguf_ctx , i, j);
732+ float val = gguf_get_arr_f32 (ctx , i, j);
733733 data[j] = val;
734734 }
735735
@@ -738,28 +738,28 @@ struct gguf_file_saver {
738738
739739 // re-write the key-value section from the loaded file
740740 void write_kv () {
741- const int32_t n_kv = gguf_get_n_kv (fl-> gguf_ctx );
741+ const int32_t n_kv = gguf_get_n_kv (ctx );
742742 for (int i = 0 ; i < n_kv; ++i) {
743- const char * key = gguf_get_key (fl-> gguf_ctx , i);
743+ const char * key = gguf_get_key (ctx , i);
744744 if (strcmp (key, " general.quantization_version" ) == 0 ) {
745745 file.write_val <uint32_t >(" general.quantization_version" , GGUF_TYPE_UINT32, GGML_QNT_VERSION);
746746 } else {
747- const gguf_type vtype = gguf_get_kv_type (fl-> gguf_ctx , i);
747+ const gguf_type vtype = gguf_get_kv_type (ctx , i);
748748
749749 switch (vtype) {
750- case GGUF_TYPE_BOOL: file.write_val <bool > (key, GGUF_TYPE_BOOL, gguf_get_val_bool (fl-> gguf_ctx , i)); break ;
751- case GGUF_TYPE_FLOAT32: file.write_val <float > (key, GGUF_TYPE_FLOAT32, gguf_get_val_f32 (fl-> gguf_ctx , i)); break ;
752- case GGUF_TYPE_INT16: file.write_val <int16_t > (key, GGUF_TYPE_INT16, gguf_get_val_i16 (fl-> gguf_ctx , i)); break ;
753- case GGUF_TYPE_INT32: file.write_val <int32_t > (key, GGUF_TYPE_INT32, gguf_get_val_i32 (fl-> gguf_ctx , i)); break ;
754- case GGUF_TYPE_INT8: file.write_val <int8_t > (key, GGUF_TYPE_INT8, gguf_get_val_i8 (fl-> gguf_ctx , i)); break ;
755- case GGUF_TYPE_STRING: file.write_str (key, GGUF_TYPE_STRING, gguf_get_val_str (fl-> gguf_ctx , i)); break ;
756- case GGUF_TYPE_UINT16: file.write_val <uint16_t >(key, GGUF_TYPE_UINT16, gguf_get_val_u16 (fl-> gguf_ctx , i)); break ;
757- case GGUF_TYPE_UINT32: file.write_val <uint32_t >(key, GGUF_TYPE_UINT32, gguf_get_val_u32 (fl-> gguf_ctx , i)); break ;
758- case GGUF_TYPE_UINT8: file.write_val <uint8_t > (key, GGUF_TYPE_UINT8, gguf_get_val_u8 (fl-> gguf_ctx , i)); break ;
750+ case GGUF_TYPE_BOOL: file.write_val <bool > (key, GGUF_TYPE_BOOL, gguf_get_val_bool (ctx , i)); break ;
751+ case GGUF_TYPE_FLOAT32: file.write_val <float > (key, GGUF_TYPE_FLOAT32, gguf_get_val_f32 (ctx , i)); break ;
752+ case GGUF_TYPE_INT16: file.write_val <int16_t > (key, GGUF_TYPE_INT16, gguf_get_val_i16 (ctx , i)); break ;
753+ case GGUF_TYPE_INT32: file.write_val <int32_t > (key, GGUF_TYPE_INT32, gguf_get_val_i32 (ctx , i)); break ;
754+ case GGUF_TYPE_INT8: file.write_val <int8_t > (key, GGUF_TYPE_INT8, gguf_get_val_i8 (ctx , i)); break ;
755+ case GGUF_TYPE_STRING: file.write_str (key, GGUF_TYPE_STRING, gguf_get_val_str (ctx , i)); break ;
756+ case GGUF_TYPE_UINT16: file.write_val <uint16_t >(key, GGUF_TYPE_UINT16, gguf_get_val_u16 (ctx , i)); break ;
757+ case GGUF_TYPE_UINT32: file.write_val <uint32_t >(key, GGUF_TYPE_UINT32, gguf_get_val_u32 (ctx , i)); break ;
758+ case GGUF_TYPE_UINT8: file.write_val <uint8_t > (key, GGUF_TYPE_UINT8, gguf_get_val_u8 (ctx , i)); break ;
759759 case GGUF_TYPE_ARRAY:
760760 {
761- const gguf_type arr_type = gguf_get_arr_type (fl-> gguf_ctx , i);
762- const int n_arr = gguf_get_arr_n (fl-> gguf_ctx , i);
761+ const gguf_type arr_type = gguf_get_arr_type (ctx , i);
762+ const int n_arr = gguf_get_arr_n (ctx , i);
763763 if (arr_type == GGUF_TYPE_FLOAT32) {
764764 write_kv_arr_f32 (key, arr_type, i, n_arr);
765765 } else if (arr_type == GGUF_TYPE_STRING) {
@@ -776,9 +776,9 @@ struct gguf_file_saver {
776776
777777 info_offset = file.tell ();
778778
779- GGML_ASSERT (gguf_get_data_offset (fl-> gguf_ctx ) >= info_offset);
779+ GGML_ASSERT (gguf_get_data_offset (ctx ) >= info_offset);
780780
781- size_t count = gguf_get_data_offset (fl-> gguf_ctx ) - info_offset;
781+ size_t count = gguf_get_data_offset (ctx ) - info_offset;
782782 file.write_zeros (count);
783783 file.seek (info_offset, SEEK_SET);
784784 GGML_ASSERT (info_offset == file.tell ());
@@ -3219,7 +3219,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
32193219 }
32203220
32213221 std::unique_ptr<llama_model_loader> model_loader (new llama_model_loader (fname_inp, /* use_mmap*/ false ));
3222- gguf_file_saver file_saver (fname_out.c_str (), model_loader->file_loader . get () );
3222+ gguf_file_saver file_saver (fname_out.c_str (), model_loader->file_loader -> gguf_ctx );
32233223
32243224#ifdef GGML_USE_K_QUANTS
32253225 int n_attention_wv = 0 ;
0 commit comments