11#include " ggml.h"
2- #include " gguf-util.h"
32#include " gguf-llama.h"
43
54#include < cstdio>
@@ -21,133 +20,22 @@ static std::string to_string(const T & val) {
2120 return ss.str ();
2221}
2322
24- void gguf_ex_write_str (std::ofstream & fout, const std::string & val) {
25- const int32_t n = val.size ();
26- fout.write ((const char *) &n, sizeof (n));
27- fout.write (val.c_str (), n);
28- }
29-
30- void gguf_ex_write_i32 (std::ofstream & fout, int32_t val) {
31- fout.write ((const char *) &val, sizeof (val));
32- }
33-
34- void gguf_ex_write_u64 (std::ofstream & fout, size_t val) {
35- fout.write ((const char *) &val, sizeof (val));
36- }
37-
38- template <typename T>
39- void gguf_ex_write_val (std::ofstream & fout, const std::string & key, enum gguf_type type, const T & val) {
40- gguf_ex_write_str (fout, key);
41- fout.write ((const char *) &type, sizeof (type));
42- fout.write ((const char *) &val, sizeof (val));
43-
44- fprintf (stdout, " %s: write param: %s = %s\n " , __func__, key.c_str (), to_string (val).c_str ());
45- }
46-
47- template <>
48- void gguf_ex_write_val<std::string>(std::ofstream & fout, const std::string & key, enum gguf_type type, const std::string & val) {
49- gguf_ex_write_str (fout, key);
50- fout.write ((const char *) &type, sizeof (type));
51-
52- const int32_t n = val.size ();
53- fout.write ((const char *) &n, sizeof (n));
54- fout.write (val.c_str (), n);
55-
56- fprintf (stdout, " %s: write param: %s = %s\n " , __func__, key.c_str (), val.c_str ());
57- }
58-
59- template <typename T>
60- void gguf_ex_write_arr (std::ofstream & fout, const std::string & key, enum gguf_type type, const std::vector<T> & val) {
61- gguf_ex_write_str (fout, key);
62- {
63- const enum gguf_type tarr = GGUF_TYPE_ARRAY;
64- fout.write ((const char *) &tarr, sizeof (tarr));
65- }
66-
67- const int32_t n = val.size ();
68- fout.write ((const char *) &type, sizeof (type));
69- fout.write ((const char *) &n, sizeof (n));
70- fout.write ((const char *) val.data (), n * sizeof (T));
71-
72- fprintf (stdout, " %s: write param: %s = [" , __func__, key.c_str ());
73- for (int i = 0 ; i < n; ++i) {
74- fprintf (stdout, " %s" , to_string (val[i]).c_str ());
75- if (i < n - 1 ) {
76- fprintf (stdout, " , " );
77- }
78- }
79- fprintf (stdout, " ]\n " );
80- }
81-
82- template <>
83- void gguf_ex_write_arr<std::string>(std::ofstream & fout, const std::string & key, enum gguf_type type, const std::vector<std::string> & val) {
84- gguf_ex_write_str (fout, key);
85- {
86- const enum gguf_type tarr = GGUF_TYPE_ARRAY;
87- fout.write ((const char *) &tarr, sizeof (tarr));
88- }
89-
90- const int32_t n = val.size ();
91- fout.write ((const char *) &type, sizeof (type));
92- fout.write ((const char *) &n, sizeof (n));
93- for (int i = 0 ; i < n; ++i) {
94- const int32_t nstr = val[i].size ();
95- fout.write ((const char *) &nstr, sizeof (nstr));
96- fout.write (val[i].c_str (), nstr);
97- }
98-
99- fprintf (stdout, " %s: write param: %s = [" , __func__, key.c_str ());
100- for (int i = 0 ; i < n; ++i) {
101- fprintf (stdout, " %s" , val[i].c_str ());
102- if (i < n - 1 ) {
103- fprintf (stdout, " , " );
104- }
105- }
106- fprintf (stdout, " ]\n " );
107- }
108-
10923bool gguf_ex_write (const std::string & fname) {
110- std::ofstream fout (fname.c_str (), std::ios::binary);
111-
112- {
113- const int32_t magic = GGUF_MAGIC;
114- fout.write ((const char *) &magic, sizeof (magic));
115- }
116-
117- {
118- const int32_t version = GGUF_VERSION;
119- fout.write ((const char *) &version, sizeof (version));
120- }
121-
122- // NOTE: these have to match the output below!
123- const int n_tensors = 10 ;
124- const int n_kv = 12 ;
125-
126- fout.write ((const char *) &n_tensors, sizeof (n_tensors));
127- fout.write ((const char *) &n_kv, sizeof (n_kv));
128-
129- fprintf (stdout, " %s: write header\n " , __func__);
130-
131- // kv data
132- {
133- gguf_ex_write_val< uint8_t >(fout, " some.parameter.uint8" , GGUF_TYPE_UINT8, 0x12 );
134- gguf_ex_write_val< int8_t >(fout, " some.parameter.int8" , GGUF_TYPE_INT8, -0x13 );
135- gguf_ex_write_val<uint16_t >(fout, " some.parameter.uint16" , GGUF_TYPE_UINT16, 0x1234 );
136- gguf_ex_write_val< int16_t >(fout, " some.parameter.int16" , GGUF_TYPE_INT16, -0x1235 );
137- gguf_ex_write_val<uint32_t >(fout, " some.parameter.uint32" , GGUF_TYPE_UINT32, 0x12345678 );
138- gguf_ex_write_val< int32_t >(fout, " some.parameter.int32" , GGUF_TYPE_INT32, -0x12345679 );
139-
140- gguf_ex_write_val<float > (fout, " some.parameter.float32" , GGUF_TYPE_FLOAT32, 0 .123456789f );
141- gguf_ex_write_val<bool > (fout, " some.parameter.bool" , GGUF_TYPE_BOOL, true );
142-
143- gguf_ex_write_val<std::string>(fout, " some.parameter.string" , GGUF_TYPE_STRING, " hello world" );
144-
145- gguf_ex_write_arr<int16_t > (fout, " some.parameter.arr.i16" , GGUF_TYPE_INT16, { 1 , 2 , 3 , 4 , });
146- gguf_ex_write_arr<float > (fout, " some.parameter.arr.f32" , GGUF_TYPE_FLOAT32, { 3 .145f , 2 .718f , 1 .414f , });
147- gguf_ex_write_arr<std::string>(fout, " some.parameter.arr.str" , GGUF_TYPE_STRING, { " hello" , " world" , " !" });
148- }
149-
150- uint64_t offset_tensor = 0 ;
24+ struct gguf_context * ctx = gguf_init_empty ();
25+
26+ gguf_set_val_u8 (ctx, " some.parameter.uint8" , 0x12 );
27+ gguf_set_val_i8 (ctx, " some.parameter.int8" , -0x13 );
28+ gguf_set_val_u16 (ctx, " some.parameter.uint16" , 0x1234 );
29+ gguf_set_val_i16 (ctx, " some.parameter.int16" , -0x1235 );
30+ gguf_set_val_u32 (ctx, " some.parameter.uint32" , 0x12345678 );
31+ gguf_set_val_i32 (ctx, " some.parameter.int32" , -0x12345679 );
32+ gguf_set_val_f32 (ctx, " some.parameter.float32" , 0 .123456789f );
33+ gguf_set_val_bool (ctx, " some.parameter.bool" , true );
34+ gguf_set_val_str (ctx, " some.parameter.string" , " hello world" );
35+
36+ gguf_set_arr_data (ctx, " some.parameter.arr.i16" , GGUF_TYPE_INT16, std::vector<int16_t >{ 1 , 2 , 3 , 4 , }.data (), 4 );
37+ gguf_set_arr_data (ctx, " some.parameter.arr.f32" , GGUF_TYPE_FLOAT32, std::vector<float >{ 3 .145f , 2 .718f , 1 .414f , }.data (), 3 );
38+ gguf_set_arr_str (ctx, " some.parameter.arr.str" , std::vector<const char *>{ " hello" , " world" , " !" }.data (), 3 );
15139
15240 struct ggml_init_params params = {
15341 /* .mem_size =*/ 128ull *1024ull *1024ull ,
@@ -157,6 +45,8 @@ bool gguf_ex_write(const std::string & fname) {
15745
15846 struct ggml_context * ctx_data = ggml_init (params);
15947
48+ const int n_tensors = 10 ;
49+
16050 // tensor infos
16151 for (int i = 0 ; i < n_tensors; ++i) {
16252 const std::string name = " tensor_" + to_string (i);
@@ -178,58 +68,15 @@ bool gguf_ex_write(const std::string & fname) {
17868 }
17969 }
18070
181- fprintf (stdout, " %s: tensor: %s, %d dims, ne = [" , __func__, name.c_str (), n_dims);
182- for (int j = 0 ; j < 4 ; ++j) {
183- fprintf (stdout, " %s%3d" , j == 0 ? " " : " , " , (int ) cur->ne [j]);
184- }
185- fprintf (stdout, " ], offset_tensor = %6" PRIu64 " \n " , offset_tensor);
186-
187- gguf_ex_write_str (fout, name);
188- gguf_ex_write_i32 (fout, n_dims);
189- for (int j = 0 ; j < n_dims; ++j) {
190- gguf_ex_write_i32 (fout, cur->ne [j]);
191- }
192- gguf_ex_write_i32 (fout, cur->type );
193- gguf_ex_write_u64 (fout, offset_tensor);
194-
195- offset_tensor += GGML_PAD (ggml_nbytes (cur), GGUF_DEFAULT_ALIGNMENT);
196- }
197-
198- const uint64_t offset_data = GGML_PAD ((uint64_t ) fout.tellp (), GGUF_DEFAULT_ALIGNMENT);
199-
200- fprintf (stdout, " %s: data offset = %" PRIu64 " \n " , __func__, offset_data);
201-
202- {
203- const size_t pad = offset_data - fout.tellp ();
204-
205- for (size_t j = 0 ; j < pad; ++j) {
206- fout.put (0 );
207- }
208- }
209-
210- for (int i = 0 ; i < n_tensors; ++i) {
211- fprintf (stdout, " %s: writing tensor %d data\n " , __func__, i);
212-
213- const std::string name = " tensor_" + to_string (i);
214-
215- struct ggml_tensor * cur = ggml_get_tensor (ctx_data, name.c_str ());
216-
217- fout.write ((const char *) cur->data , ggml_nbytes (cur));
218-
219- {
220- const size_t pad = GGML_PAD (ggml_nbytes (cur), GGUF_DEFAULT_ALIGNMENT) - ggml_nbytes (cur);
221-
222- for (size_t j = 0 ; j < pad; ++j) {
223- fout.put (0 );
224- }
225- }
71+ gguf_add_tensor (ctx, cur);
22672 }
22773
228- fout. close ( );
74+ gguf_write_to_file (ctx, fname. c_str (), false );
22975
23076 fprintf (stdout, " %s: wrote file '%s;\n " , __func__, fname.c_str ());
23177
23278 ggml_free (ctx_data);
79+ gguf_free (ctx);
23380
23481 return true ;
23582}
@@ -345,8 +192,16 @@ bool gguf_ex_read_1(const std::string & fname) {
345192
346193 struct ggml_tensor * cur = ggml_get_tensor (ctx_data, name);
347194
348- fprintf (stdout, " %s: tensor[%d]: n_dims = %d, name = %s, data = %p\n " ,
349- __func__, i, cur->n_dims , cur->name , cur->data );
195+ fprintf (stdout, " %s: tensor[%d]: n_dims = %d, name = %s, data = %p\n " , __func__, i, cur->n_dims , cur->name , cur->data );
196+
197+ // print first 10 elements
198+ const float * data = (const float *) cur->data ;
199+
200+ printf (" %s data[:10] : " , name);
201+ for (int j = 0 ; j < MIN (10 , ggml_nelements (cur)); ++j) {
202+ printf (" %f " , data[j]);
203+ }
204+ printf (" \n\n " );
350205
351206 // check data
352207 {
@@ -369,48 +224,6 @@ bool gguf_ex_read_1(const std::string & fname) {
369224 return true ;
370225}
371226
372- // read just the tensor info and mmap the data in user code
373- bool gguf_ex_read_2 (const std::string & fname) {
374- struct ggml_context * ctx_data = NULL ;
375-
376- struct gguf_init_params params = {
377- /* .no_alloc = */ true ,
378- /* .ctx = */ &ctx_data,
379- };
380-
381- struct gguf_context * ctx = gguf_init_from_file (fname.c_str (), params);
382-
383- struct gguf_file file (fname.c_str(), "rb");
384- gguf_mmap data_mmap (&file, 0 , false );
385-
386- const int n_tensors = gguf_get_n_tensors (ctx);
387-
388- for (int i = 0 ; i < n_tensors; ++i) {
389- const char * name = gguf_get_tensor_name (ctx, i);
390- const size_t offset = gguf_get_data_offset (ctx) + gguf_get_tensor_offset (ctx, i);
391-
392- struct ggml_tensor * cur = ggml_get_tensor (ctx_data, name);
393-
394- cur->data = static_cast <char *>(data_mmap.addr ) + offset;
395-
396- // print first 10 elements
397- const float * data = (const float *) cur->data ;
398-
399- printf (" %s data[:10] : " , name);
400- for (int j = 0 ; j < MIN (10 , ggml_nelements (cur)); ++j) {
401- printf (" %f " , data[j]);
402- }
403- printf (" \n\n " );
404- }
405-
406- fprintf (stdout, " %s: ctx_data size: %zu\n " , __func__, ggml_get_mem_size (ctx_data));
407-
408- ggml_free (ctx_data);
409- gguf_free (ctx);
410-
411- return true ;
412- }
413-
414227int main (int argc, char ** argv) {
415228 if (argc < 3 ) {
416229 fprintf (stdout, " usage: %s data.gguf r|w\n " , argv[0 ]);
@@ -427,7 +240,6 @@ int main(int argc, char ** argv) {
427240 } else if (mode == " r" ) {
428241 GGML_ASSERT (gguf_ex_read_0 (fname) && " failed to read gguf file" );
429242 GGML_ASSERT (gguf_ex_read_1 (fname) && " failed to read gguf file" );
430- GGML_ASSERT (gguf_ex_read_2 (fname) && " failed to read gguf file" );
431243 } else if (mode == " q" ) {
432244 llama_model_quantize_params params = llama_model_quantize_default_params ();
433245 llama_model_quantize (fname.c_str (), " quant.gguf" , ¶ms);
0 commit comments