1313from transformers import AutoTokenizer
1414
1515# ref: https://github.com/openai/gpt-2/blob/master/src/encoder.py
16+
17+
1618def bytes_to_unicode ():
1719 """
1820 Returns list of utf-8 byte and a corresponding list of unicode strings.
@@ -34,6 +36,7 @@ def bytes_to_unicode():
3436 cs = [chr (n ) for n in cs ]
3537 return dict (zip (bs , cs ))
3638
39+
3740def count_model_parts (dir_model : str ) -> int :
3841 num_parts = 0
3942 for filename in os .listdir (dir_model ):
@@ -44,6 +47,7 @@ def count_model_parts(dir_model: str) -> int:
4447 print ("gguf: found " + str (num_parts ) + " model parts" )
4548 return num_parts
4649
50+
4751if len (sys .argv ) < 3 :
4852 print ("Usage: convert-h5-to-ggml.py dir-model ftype\n " )
4953 print (" ftype == 0 -> float32" )
@@ -58,7 +62,7 @@ def count_model_parts(dir_model: str) -> int:
5862# possible tensor data types
5963# ftype == 0 -> float32
6064# ftype == 1 -> float16
61- #
65+
6266# map from ftype to string
6367ftype_str = ["f32" , "f16" ]
6468
@@ -67,6 +71,7 @@ def count_model_parts(dir_model: str) -> int:
6771 ftype = int (sys .argv [2 ])
6872 if ftype < 0 or ftype > 1 :
6973 print ("Invalid ftype: " + str (ftype ))
74+
7075 sys .exit (1 )
7176
7277fname_out = sys .argv [1 ] + "/ggml-model-" + ftype_str [ftype ] + ".gguf"
@@ -77,30 +82,30 @@ def count_model_parts(dir_model: str) -> int:
7782 hparams = json .load (f )
7883
7984if hparams ["architectures" ][0 ] != "GPTNeoXForCausalLM" :
80- print ("Model architecture not supported: " + hparams ["architectures" ][0 ] )
85+ print ("Model architecture not supported: " + hparams ["architectures" ][0 ])
86+
8187 sys .exit ()
8288
8389# get number of model parts
8490num_parts = count_model_parts (dir_model )
8591
86- gguf_writer = gguf .GGUFWriter .open (fname_out )
92+ llm_arch = "gptneox"
93+ gguf_writer = gguf .GGUFWriter (fname_out , arch = llm_arch )
8794
8895print ("gguf: get model metadata" )
8996
90- llm_arch = "gptneox"
9197block_count = hparams ["num_hidden_layers" ]
9298
93- gguf_writer .add_architecture (llm_arch )
99+ gguf_writer .add_architecture ()
94100gguf_writer .add_name (last_dir )
95- gguf_writer .add_file_type ( "All tensors F32" if ftype == 0 else "Most tensors F16, some F32" )
96- gguf_writer .add_context_length (llm_arch , hparams ["max_position_embeddings" ])
97- gguf_writer .add_embedding_length (llm_arch , hparams ["hidden_size" ])
98- gguf_writer .add_block_count (llm_arch , block_count )
99- gguf_writer .add_feed_forward_length (llm_arch , hparams ["intermediate_size" ])
100- gguf_writer .add_rope_dimension_count (llm_arch , int ( hparams ["rotary_pct" ]* (hparams ["hidden_size" ]// hparams ["num_attention_heads" ])) )
101- gguf_writer .add_head_count (llm_arch , hparams ["num_attention_heads" ])
102- gguf_writer .add_parallel_residual (llm_arch , hparams ["use_parallel_residual" ] if "use_parallel_residual" in hparams else True )
103- gguf_writer .add_layer_norm_eps (llm_arch , hparams ["layer_norm_eps" ])
101+ gguf_writer .add_context_length (hparams ["max_position_embeddings" ])
102+ gguf_writer .add_embedding_length (hparams ["hidden_size" ])
103+ gguf_writer .add_block_count (block_count )
104+ gguf_writer .add_feed_forward_length (hparams ["intermediate_size" ])
105+ gguf_writer .add_rope_dimension_count (int (hparams ["rotary_pct" ]* (hparams ["hidden_size" ]// hparams ["num_attention_heads" ])))
106+ gguf_writer .add_head_count (hparams ["num_attention_heads" ])
107+ gguf_writer .add_parallel_residual (hparams ["use_parallel_residual" ] if "use_parallel_residual" in hparams else True )
108+ gguf_writer .add_layer_norm_eps (hparams ["layer_norm_eps" ])
104109
105110# TOKENIZATION
106111
@@ -124,14 +129,14 @@ def count_model_parts(dir_model: str) -> int:
124129
125130 print ("gguf: get gpt2 tokenizer vocab" )
126131
127- vocab_size = len ( tokenizer_json ["model" ]["vocab" ] )
132+ vocab_size = len (tokenizer_json ["model" ]["vocab" ])
128133
129134 # ref: https://github.com/cmp-nct/ggllm.cpp/blob/master/falcon_convert.py
130135 tokenizer = AutoTokenizer .from_pretrained (dir_model )
131136
132137 reverse_vocab = {id : encoded_tok for encoded_tok , id in tokenizer .vocab .items ()}
133138 byte_encoder = bytes_to_unicode ()
134- byte_decoder = {v :k for k , v in byte_encoder .items ()}
139+ byte_decoder = {v : k for k , v in byte_encoder .items ()}
135140
136141 for i in range (vocab_size ):
137142 if i in reverse_vocab :
@@ -146,8 +151,9 @@ def count_model_parts(dir_model: str) -> int:
146151 text .extend (c .encode ('utf-8' ))
147152 else :
148153 print (f"Key { i } not in tokenizer vocabulary. Padding with an arbitrary token." )
149- padding_token = f"[PAD{ i } ]" .encode ("utf8" )
150- text = bytearray (padding_token )
154+ pad_token = f"[PAD{ i } ]" .encode ("utf8" )
155+ text = bytearray (pad_token )
156+
151157 tokens .append (text )
152158
153159 gguf_writer .add_token_list (tokens )
@@ -201,7 +207,7 @@ def count_model_parts(dir_model: str) -> int:
201207 )
202208
203209for part_name in part_names :
204- print ("gguf: loading model part '" + part_name + "'" )
210+ print ("gguf: loading model part '" + part_name + "'" )
205211 model_part = torch .load (f"{ dir_model } /{ part_name } " , map_location = "cpu" )
206212
207213 for name in model_part .keys ():
@@ -223,11 +229,12 @@ def count_model_parts(dir_model: str) -> int:
223229 elif name .endswith (".bias" ) and name [:- 5 ] in tensor_map :
224230 name = tensor_map [name [:- 5 ]] + ".bias"
225231 else :
226- print ( "Can not map tensor '" + name + "'" )
232+ print ("Can not map tensor '" + name + "'" )
227233 sys .exit ()
228234
229235 n_dims = len (data .shape )
230236 data_dtype = data .dtype
237+ old_dtype = data_dtype
231238
232239 # if f32 desired, convert any float16 to float32
233240 if ftype == 0 and data .dtype == np .float16 :
@@ -241,77 +248,21 @@ def count_model_parts(dir_model: str) -> int:
241248 if ftype == 1 and data .dtype == np .float32 and name .endswith (".weight" ) and n_dims == 2 :
242249 data_dtype = np .float16
243250
244- data_nbytes = data .size * 2 if data_dtype == np .float16 else data .size * 4
251+ print (name + ", n_dims = " + str (n_dims ) + ", " + str (old_dtype ) + " --> " + str (data_dtype ))
252+
253+ data = data .astype (data_dtype )
245254
246- gguf_writer .add_tensor_info (name , data . shape , data_dtype , data_nbytes )
255+ gguf_writer .add_tensor (name , data )
247256
248257
249258print ("gguf: write header" )
250259gguf_writer .write_header_to_file ()
251260print ("gguf: write metadata" )
252261gguf_writer .write_kv_data_to_file ()
253- print ("gguf: write tensor metadata" )
254- gguf_writer .write_ti_data_to_file ()
255-
256- # tensor data
257- print ("gguf: convert and write tensor data" )
258-
259- if num_parts == 0 :
260- part_names = ("pytorch_model.bin" ,)
261- else :
262- part_names = (
263- f"pytorch_model-{ n :05} -of-{ num_parts :05} .bin" for n in range (1 , num_parts + 1 )
264- )
265-
266- for part_name in part_names :
267- print ("gguf: loading model part '" + part_name + "'" )
268- model_part = torch .load (f"{ dir_model } /{ part_name } " , map_location = "cpu" )
269-
270- for name in model_part .keys ():
271- data = model_part [name ]
272-
273- old_dtype = data .dtype
274-
275- # we don't need these
276- if name .endswith (".attention.masked_bias" ) or name .endswith (".attention.bias" ) or name .endswith (".attention.rotary_emb.inv_freq" ):
277- continue
278-
279- # convert any unsupported data types to float32
280- if data .dtype != torch .float16 and data .dtype != torch .float32 :
281- data = data .to (torch .float32 )
282-
283- data = data .squeeze ().numpy ()
284-
285- # map tensor names
286- if name .endswith (".weight" ) and name [:- 7 ] in tensor_map :
287- name = tensor_map [name [:- 7 ]] + ".weight"
288- elif name .endswith (".bias" ) and name [:- 5 ] in tensor_map :
289- name = tensor_map [name [:- 5 ]] + ".bias"
290- else :
291- print ( "Can not map tensor '" + name + "'" )
292- sys .exit ()
293-
294- n_dims = len (data .shape )
295- data_dtype = data .dtype
296-
297- # if f32 desired, convert any float16 to float32
298- if ftype == 0 and data .dtype == np .float16 :
299- data = data .astype (np .float32 )
300-
301- # TODO: Why cant we use these float16 as-is? There should be not reason to store float16 as float32
302- if ftype == 1 and data_dtype == np .float16 and n_dims == 1 :
303- data = data .astype (np .float32 )
304-
305- # if f16 desired, convert any float32 2-dim weight tensors to float16
306- if ftype == 1 and data_dtype == np .float32 and name .endswith (".weight" ) and n_dims == 2 :
307- data = data .astype (np .float16 )
308-
309- print ( name + ", shape " + str (len (data .shape )) + ", " + str (old_dtype ) + " --> " + str (data .dtype ))
310-
311- gguf_writer .write_tensor_to_file (data )
262+ print ("gguf: write tensors" )
263+ gguf_writer .write_tensors_to_file ()
312264
313265gguf_writer .close ()
314266
315-
316- print ("gguf: model successfully exported to '" + fname_out + "'" )
267+ print ("gguf: model successfully exported to '" + fname_out + "'" )
317268print ("" )
0 commit comments