@@ -438,6 +438,11 @@ struct llama_file {
438438 read_raw (&ret, sizeof (ret));
439439 return ret;
440440 }
441+ std::float_t read_f32 () {
442+ std::float_t ret;
443+ read_raw (&ret, sizeof (ret));
444+ return ret;
445+ }
441446
442447 std::string read_string (std::uint32_t len) {
443448 std::vector<char > chars (len);
@@ -491,6 +496,59 @@ void write_tensor(struct llama_file * file, struct ggml_tensor * tensor) {
491496 file->write_raw (tensor->data , ggml_nbytes (tensor));
492497}
493498
499+ bool is_ggml_file (const char *filename) {
500+ llama_file file (filename, " rb" );
501+ if (file.size < 4 ) {
502+ return false ;
503+ }
504+ uint32_t magic = file.read_u32 ();
505+ return magic == LLAMA_FILE_MAGIC;
506+ }
507+
508+ void load_vocab (const char *filename, Config *config, struct llama_vocab *vocab) {
509+ // heuristic to infer whether vocab is from ggml or from llama2.c vocabulary
510+ if (is_ggml_file (filename)) {
511+
512+ struct llama_context_params llama_params = llama_context_default_params ();
513+ llama_params.vocab_only = true ;
514+
515+ struct llama_model * lmodel = llama_load_model_from_file (filename, llama_params);
516+ struct llama_context * lctx = llama_new_context_with_model (lmodel, llama_params);
517+
518+ std::vector<const char *> strings;
519+ std::vector<float > scores;
520+ int n_vocab = llama_n_vocab (lctx);
521+ strings.resize (n_vocab, NULL );
522+ scores.resize (n_vocab, 0 );
523+ n_vocab = llama_get_vocab (lctx, strings.data (), scores.data (), n_vocab);
524+ GGML_ASSERT (n_vocab == llama_n_vocab (lctx));
525+ vocab->id_to_token .resize (n_vocab);
526+ for (int i=0 ; i<n_vocab; ++i) {
527+ std::string tok = std::string (strings[i]);
528+ float score = scores[i];
529+ vocab->id_to_token [i].tok = tok;
530+ vocab->id_to_token [i].score = score;
531+ vocab->token_to_id .emplace (tok, i);
532+ }
533+ llama_free (lctx);
534+ llama_free_model (lmodel);
535+ } else { // assume llama2.c vocabulary
536+ printf (" Assuming llama2.c vocabulary since %s is not a ggml file\n " , filename);
537+ llama_file file (filename, " rb" );
538+ uint32_t n_vocab = config->vocab_size ;
539+ /* uint32_t max_token_length = */ file.read_u32 (); // unused
540+ vocab->id_to_token .resize (n_vocab);
541+ for (uint32_t i=0 ; i<n_vocab; ++i) {
542+ float_t score = file.read_f32 ();
543+ uint32_t len = file.read_u32 ();
544+ std::string tok = file.read_string (len);
545+ vocab->id_to_token [i].tok = tok;
546+ vocab->id_to_token [i].score = score;
547+ vocab->token_to_id .emplace (tok, i);
548+ }
549+ }
550+ }
551+
494552void stuff_karpathy_weights_into_gg (struct ggml_tensor * gg_weights, float * karpathy_weights){
495553 int ct;
496554 switch (gg_weights->n_dims ){
@@ -658,7 +716,7 @@ void print_usage(int /*argc*/, char ** argv, const struct train_params * params)
658716 fprintf (stderr, " \n " );
659717 fprintf (stderr, " options:\n " );
660718 fprintf (stderr, " -h, --help show this help message and exit\n " );
661- fprintf (stderr, " --copy-vocab-from-model FNAME model path from which to copy vocab (default '%s')\n " , params->fn_vocab_model );
719+ fprintf (stderr, " --copy-vocab-from-model FNAME llama2.c vocabulary or ggml model path from which to copy vocab (default '%s')\n " , params->fn_vocab_model );
662720 fprintf (stderr, " --llama2c-model FNAME [REQUIRED] model path from which to load Karpathy's llama2.c model\n " );
663721 fprintf (stderr, " --llama2c-output-model FNAME model path to save the converted llama2.c model (default %s')\n " , params->fn_llama2c_output_model );
664722 fprintf (stderr, " \n " );
@@ -737,30 +795,9 @@ int main(int argc, char ** argv) {
737795 fclose (file);
738796 }
739797
740- struct llama_context_params llama_params = llama_context_default_params ();
741- llama_params.vocab_only = true ;
742-
743- struct llama_model * lmodel = llama_load_model_from_file (params.fn_vocab_model , llama_params);
744- struct llama_context * lctx = llama_new_context_with_model (lmodel, llama_params);
745-
746798 struct llama_vocab vocab;
747- {
748- std::vector<const char *> strings;
749- std::vector<float > scores;
750- int n_vocab = llama_n_vocab (lctx);
751- strings.resize (n_vocab, NULL );
752- scores.resize (n_vocab, 0 );
753- n_vocab = llama_get_vocab (lctx, strings.data (), scores.data (), n_vocab);
754- GGML_ASSERT (n_vocab == llama_n_vocab (lctx));
755- vocab.id_to_token .resize (n_vocab);
756- for (int i=0 ; i<n_vocab; ++i) {
757- std::string tok = std::string (strings[i]);
758- float score = scores[i];
759- vocab.id_to_token [i].tok = tok;
760- vocab.id_to_token [i].score = score;
761- vocab.token_to_id .emplace (tok, i);
762- }
763- }
799+ load_vocab (params.fn_vocab_model , &config, &vocab);
800+
764801 struct my_llama_model model;
765802 model.hparams .n_vocab = config.vocab_size ; // llama_n_vocab(lctx);
766803 model.hparams .n_ctx = params.n_ctx ;
@@ -782,8 +819,6 @@ int main(int argc, char ** argv) {
782819
783820 printf (" Saving llama.c model file %s in ggml format at %s\n " , params.fn_llama2c_model , params.fn_llama2c_output_model );
784821
785- llama_free (lctx);
786- llama_free_model (lmodel);
787822 ggml_free (model.ctx );
788823 free_weights (&weights);
789824 return 0 ;
0 commit comments