@@ -190,8 +190,10 @@ struct llama_hparams {
190190 uint32_t n_head = 32 ;
191191 uint32_t n_layer = 32 ;
192192 uint32_t n_rot = 64 ;
193+
193194 float rope_freq_base = 10000 .0f ;
194195 float rope_freq_scale = 1 .0f ;
196+
195197 enum llama_ftype ftype = LLAMA_FTYPE_MOSTLY_F16;
196198
197199 bool operator !=(const llama_hparams & other) const {
@@ -843,12 +845,12 @@ struct llama_context_params llama_context_default_params() {
843845 struct llama_context_params result = {
844846 /* .seed =*/ LLAMA_DEFAULT_SEED,
845847 /* .n_ctx =*/ 512 ,
846- /* .rope_freq_base =*/ 10000 .0f ,
847- /* .rope_freq_scale =*/ 1 .0f ,
848848 /* .n_batch =*/ 512 ,
849849 /* .gpu_layers =*/ 0 ,
850850 /* .main_gpu =*/ 0 ,
851851 /* .tensor_split =*/ {0 },
852+ /* .rope_freq_base =*/ 10000 .0f ,
853+ /* .rope_freq_scale =*/ 1 .0f ,
852854 /* .progress_callback =*/ nullptr ,
853855 /* .progress_callback_user_data =*/ nullptr ,
854856 /* .low_vram =*/ false ,
@@ -968,12 +970,12 @@ static void llama_model_load_internal(
968970 llama_model & model,
969971 llama_vocab & vocab,
970972 int n_ctx,
971- float rope_freq_base,
972- float rope_freq_scale,
973973 int n_batch,
974974 int n_gpu_layers,
975975 int main_gpu,
976976 const float * tensor_split,
977+ float rope_freq_base,
978+ float rope_freq_scale,
977979 bool low_vram,
978980 ggml_type memory_type,
979981 bool use_mmap,
@@ -1008,26 +1010,27 @@ static void llama_model_load_internal(
10081010 }
10091011
10101012 hparams.n_ctx = n_ctx;
1011- hparams.rope_freq_base = rope_freq_base;
1013+
1014+ hparams.rope_freq_base = rope_freq_base;
10121015 hparams.rope_freq_scale = rope_freq_scale;
10131016 }
10141017
10151018 const uint32_t n_ff = ((2 *(4 *hparams.n_embd )/3 + hparams.n_mult - 1 )/hparams.n_mult )*hparams.n_mult ;
10161019
10171020 {
1018- fprintf (stderr, " %s: format = %s\n " , __func__, llama_file_version_name (file_version));
1019- fprintf (stderr, " %s: n_vocab = %u\n " , __func__, hparams.n_vocab );
1020- fprintf (stderr, " %s: n_ctx = %u\n " , __func__, hparams.n_ctx );
1021- fprintf (stderr, " %s: n_embd = %u\n " , __func__, hparams.n_embd );
1022- fprintf (stderr, " %s: n_mult = %u\n " , __func__, hparams.n_mult );
1023- fprintf (stderr, " %s: n_head = %u\n " , __func__, hparams.n_head );
1024- fprintf (stderr, " %s: n_layer = %u\n " , __func__, hparams.n_layer );
1025- fprintf (stderr, " %s: n_rot = %u\n " , __func__, hparams.n_rot );
1026- fprintf (stderr, " %s: freq_base = %.1f\n " , __func__, hparams.rope_freq_base );
1027- fprintf (stderr, " %s: freq_scale = %g\n " , __func__, hparams.rope_freq_scale );
1021+ fprintf (stderr, " %s: format = %s\n " , __func__, llama_file_version_name (file_version));
1022+ fprintf (stderr, " %s: n_vocab = %u\n " , __func__, hparams.n_vocab );
1023+ fprintf (stderr, " %s: n_ctx = %u\n " , __func__, hparams.n_ctx );
1024+ fprintf (stderr, " %s: n_embd = %u\n " , __func__, hparams.n_embd );
1025+ fprintf (stderr, " %s: n_mult = %u\n " , __func__, hparams.n_mult );
1026+ fprintf (stderr, " %s: n_head = %u\n " , __func__, hparams.n_head );
1027+ fprintf (stderr, " %s: n_layer = %u\n " , __func__, hparams.n_layer );
1028+ fprintf (stderr, " %s: n_rot = %u\n " , __func__, hparams.n_rot );
1029+ fprintf (stderr, " %s: freq_base = %.1f\n " , __func__, hparams.rope_freq_base );
1030+ fprintf (stderr, " %s: freq_scale = %g\n " , __func__, hparams.rope_freq_scale );
10281031 fprintf (stderr, " %s: ftype = %u (%s)\n " , __func__, hparams.ftype , llama_ftype_name (hparams.ftype ));
1029- fprintf (stderr, " %s: n_ff = %u\n " , __func__, n_ff);
1030- fprintf (stderr, " %s: model size = %s\n " , __func__, llama_model_type_name (model.type ));
1032+ fprintf (stderr, " %s: n_ff = %u\n " , __func__, n_ff);
1033+ fprintf (stderr, " %s: model size = %s\n " , __func__, llama_model_type_name (model.type ));
10311034 }
10321035
10331036 if (file_version < LLAMA_FILE_VERSION_GGJT_V2) {
@@ -1278,12 +1281,12 @@ static bool llama_model_load(
12781281 llama_model & model,
12791282 llama_vocab & vocab,
12801283 int n_ctx,
1281- float rope_freq_base,
1282- float rope_freq_scale,
12831284 int n_batch,
12841285 int n_gpu_layers,
12851286 int main_gpu,
12861287 float * tensor_split,
1288+ float rope_freq_base,
1289+ float rope_freq_scale,
12871290 bool low_vram,
12881291 ggml_type memory_type,
12891292 bool use_mmap,
@@ -1292,7 +1295,7 @@ static bool llama_model_load(
12921295 llama_progress_callback progress_callback,
12931296 void *progress_callback_user_data) {
12941297 try {
1295- llama_model_load_internal (fname, model, vocab, n_ctx, rope_freq_base, rope_freq_scale, n_batch, n_gpu_layers, main_gpu, tensor_split, low_vram, memory_type,
1298+ llama_model_load_internal (fname, model, vocab, n_ctx, n_batch, n_gpu_layers, main_gpu, tensor_split, rope_freq_base, rope_freq_scale , low_vram, memory_type,
12961299 use_mmap, use_mlock, vocab_only, progress_callback, progress_callback_user_data);
12971300 return true ;
12981301 } catch (const std::exception & err) {
@@ -1342,9 +1345,10 @@ static bool llama_eval_internal(
13421345 const int n_head = hparams.n_head ;
13431346 const int n_vocab = hparams.n_vocab ;
13441347 const int n_rot = hparams.n_embd /hparams.n_head ;
1348+ const int n_gpu_layers = model.n_gpu_layers ;
1349+
13451350 const float freq_base = hparams.rope_freq_base ;
13461351 const float freq_scale = hparams.rope_freq_scale ;
1347- const int n_gpu_layers = model.n_gpu_layers ;
13481352
13491353 auto & mem_per_token = lctx.mem_per_token ;
13501354 auto & buf_compute = lctx.buf_compute ;
@@ -2689,9 +2693,9 @@ struct llama_model * llama_load_model_from_file(
26892693
26902694 ggml_type memory_type = params.f16_kv ? GGML_TYPE_F16 : GGML_TYPE_F32;
26912695
2692- if (!llama_model_load (path_model, *model, model->vocab , params.n_ctx , params.rope_freq_base , params.rope_freq_scale ,
2693- params.n_batch , params.n_gpu_layers , params.main_gpu , params.tensor_split , params.low_vram , memory_type ,
2694- params.use_mmap , params.use_mlock , params.vocab_only , params.progress_callback ,
2696+ if (!llama_model_load (path_model, *model, model->vocab , params.n_ctx , params.n_batch , params.n_gpu_layers ,
2697+ params.main_gpu , params.tensor_split , params.rope_freq_base , params.rope_freq_scale , params.low_vram ,
2698+ memory_type, params.use_mmap , params.use_mlock , params.vocab_only , params.progress_callback ,
26952699 params.progress_callback_user_data )) {
26962700 delete model;
26972701 fprintf (stderr, " %s: failed to load model\n " , __func__);
0 commit comments