@@ -107,7 +107,7 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
107107 std::string arg;
108108 gpt_params default_params;
109109 const std::string arg_prefix = " --" ;
110- llama_sampling_params & sparams = params.sampling_params ;
110+ llama_sampling_params & sparams = params.sparams ;
111111
112112 for (int i = 1 ; i < argc; i++) {
113113 arg = argv[i];
@@ -241,25 +241,26 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
241241 invalid_param = true ;
242242 break ;
243243 }
244- sparams.repeat_last_n = std::stoi (argv[i]);
244+ sparams.penalty_last_n = std::stoi (argv[i]);
245+ sparams.n_prev = std::max (sparams.n_prev , sparams.penalty_last_n );
245246 } else if (arg == " --repeat-penalty" ) {
246247 if (++i >= argc) {
247248 invalid_param = true ;
248249 break ;
249250 }
250- sparams.repeat_penalty = std::stof (argv[i]);
251+ sparams.penalty_repeat = std::stof (argv[i]);
251252 } else if (arg == " --frequency-penalty" ) {
252253 if (++i >= argc) {
253254 invalid_param = true ;
254255 break ;
255256 }
256- sparams.frequency_penalty = std::stof (argv[i]);
257+ sparams.penalty_freq = std::stof (argv[i]);
257258 } else if (arg == " --presence-penalty" ) {
258259 if (++i >= argc) {
259260 invalid_param = true ;
260261 break ;
261262 }
262- sparams.presence_penalty = std::stof (argv[i]);
263+ sparams.penalty_present = std::stof (argv[i]);
263264 } else if (arg == " --mirostat" ) {
264265 if (++i >= argc) {
265266 invalid_param = true ;
@@ -572,7 +573,7 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
572573 invalid_param = true ;
573574 break ;
574575 }
575- params .grammar = argv[i];
576+ sparams .grammar = argv[i];
576577 } else if (arg == " --grammar-file" ) {
577578 if (++i >= argc) {
578579 invalid_param = true ;
@@ -587,7 +588,7 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
587588 std::copy (
588589 std::istreambuf_iterator<char >(file),
589590 std::istreambuf_iterator<char >(),
590- std::back_inserter (params .grammar )
591+ std::back_inserter (sparams .grammar )
591592 );
592593#ifndef LOG_DISABLE_LOGS
593594 // Parse args for logging parameters
@@ -640,7 +641,7 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
640641}
641642
642643void gpt_print_usage (int /* argc*/ , char ** argv, const gpt_params & params) {
643- const llama_sampling_params & sparams = params.sampling_params ;
644+ const llama_sampling_params & sparams = params.sparams ;
644645
645646 printf (" usage: %s [options]\n " , argv[0 ]);
646647 printf (" \n " );
@@ -678,10 +679,10 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
678679 printf (" --top-p N top-p sampling (default: %.1f, 1.0 = disabled)\n " , (double )sparams.top_p );
679680 printf (" --tfs N tail free sampling, parameter z (default: %.1f, 1.0 = disabled)\n " , (double )sparams.tfs_z );
680681 printf (" --typical N locally typical sampling, parameter p (default: %.1f, 1.0 = disabled)\n " , (double )sparams.typical_p );
681- printf (" --repeat-last-n N last n tokens to consider for penalize (default: %d, 0 = disabled, -1 = ctx_size)\n " , sparams.repeat_last_n );
682- printf (" --repeat-penalty N penalize repeat sequence of tokens (default: %.1f, 1.0 = disabled)\n " , (double )sparams.repeat_penalty );
683- printf (" --presence-penalty N repeat alpha presence penalty (default: %.1f, 0.0 = disabled)\n " , (double )sparams.presence_penalty );
684- printf (" --frequency-penalty N repeat alpha frequency penalty (default: %.1f, 0.0 = disabled)\n " , (double )sparams.frequency_penalty );
682+ printf (" --repeat-last-n N last n tokens to consider for penalize (default: %d, 0 = disabled, -1 = ctx_size)\n " , sparams.penalty_last_n );
683+ printf (" --repeat-penalty N penalize repeat sequence of tokens (default: %.1f, 1.0 = disabled)\n " , (double )sparams.penalty_repeat );
684+ printf (" --presence-penalty N repeat alpha presence penalty (default: %.1f, 0.0 = disabled)\n " , (double )sparams.penalty_present );
685+ printf (" --frequency-penalty N repeat alpha frequency penalty (default: %.1f, 0.0 = disabled)\n " , (double )sparams.penalty_freq );
685686 printf (" --mirostat N use Mirostat sampling.\n " );
686687 printf (" Top K, Nucleus, Tail Free and Locally Typical samplers are ignored if used.\n " );
687688 printf (" (default: %d, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)\n " , sparams.mirostat );
@@ -878,7 +879,7 @@ std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_par
878879 }
879880
880881 if (params.ignore_eos ) {
881- params.sampling_params .logit_bias [llama_token_eos (lctx)] = -INFINITY;
882+ params.sparams .logit_bias [llama_token_eos (lctx)] = -INFINITY;
882883 }
883884
884885 {
@@ -1123,28 +1124,28 @@ std::string get_sortable_timestamp() {
11231124
11241125void dump_non_result_info_yaml (FILE * stream, const gpt_params & params, const llama_context * lctx,
11251126 const std::string & timestamp, const std::vector<int > & prompt_tokens, const char * model_desc) {
1126- const llama_sampling_params & sparams = params.sampling_params ;
1127+ const llama_sampling_params & sparams = params.sparams ;
11271128
11281129 fprintf (stream, " build_commit: %s\n " , BUILD_COMMIT);
11291130 fprintf (stream, " build_number: %d\n " , BUILD_NUMBER);
1130- fprintf (stream, " cpu_has_arm_fma: %s\n " , ggml_cpu_has_arm_fma () ? " true" : " false" );
1131- fprintf (stream, " cpu_has_avx: %s\n " , ggml_cpu_has_avx () ? " true" : " false" );
1132- fprintf (stream, " cpu_has_avx2: %s\n " , ggml_cpu_has_avx2 () ? " true" : " false" );
1133- fprintf (stream, " cpu_has_avx512: %s\n " , ggml_cpu_has_avx512 () ? " true" : " false" );
1131+ fprintf (stream, " cpu_has_arm_fma: %s\n " , ggml_cpu_has_arm_fma () ? " true" : " false" );
1132+ fprintf (stream, " cpu_has_avx: %s\n " , ggml_cpu_has_avx () ? " true" : " false" );
1133+ fprintf (stream, " cpu_has_avx2: %s\n " , ggml_cpu_has_avx2 () ? " true" : " false" );
1134+ fprintf (stream, " cpu_has_avx512: %s\n " , ggml_cpu_has_avx512 () ? " true" : " false" );
11341135 fprintf (stream, " cpu_has_avx512_vbmi: %s\n " , ggml_cpu_has_avx512_vbmi () ? " true" : " false" );
11351136 fprintf (stream, " cpu_has_avx512_vnni: %s\n " , ggml_cpu_has_avx512_vnni () ? " true" : " false" );
1136- fprintf (stream, " cpu_has_blas: %s\n " , ggml_cpu_has_blas () ? " true" : " false" );
1137- fprintf (stream, " cpu_has_cublas: %s\n " , ggml_cpu_has_cublas () ? " true" : " false" );
1138- fprintf (stream, " cpu_has_clblast: %s\n " , ggml_cpu_has_clblast () ? " true" : " false" );
1139- fprintf (stream, " cpu_has_fma: %s\n " , ggml_cpu_has_fma () ? " true" : " false" );
1140- fprintf (stream, " cpu_has_gpublas: %s\n " , ggml_cpu_has_gpublas () ? " true" : " false" );
1141- fprintf (stream, " cpu_has_neon: %s\n " , ggml_cpu_has_neon () ? " true" : " false" );
1142- fprintf (stream, " cpu_has_f16c: %s\n " , ggml_cpu_has_f16c () ? " true" : " false" );
1143- fprintf (stream, " cpu_has_fp16_va: %s\n " , ggml_cpu_has_fp16_va () ? " true" : " false" );
1144- fprintf (stream, " cpu_has_wasm_simd: %s\n " , ggml_cpu_has_wasm_simd () ? " true" : " false" );
1145- fprintf (stream, " cpu_has_blas: %s\n " , ggml_cpu_has_blas () ? " true" : " false" );
1146- fprintf (stream, " cpu_has_sse3: %s\n " , ggml_cpu_has_sse3 () ? " true" : " false" );
1147- fprintf (stream, " cpu_has_vsx: %s\n " , ggml_cpu_has_vsx () ? " true" : " false" );
1137+ fprintf (stream, " cpu_has_blas: %s\n " , ggml_cpu_has_blas () ? " true" : " false" );
1138+ fprintf (stream, " cpu_has_cublas: %s\n " , ggml_cpu_has_cublas () ? " true" : " false" );
1139+ fprintf (stream, " cpu_has_clblast: %s\n " , ggml_cpu_has_clblast () ? " true" : " false" );
1140+ fprintf (stream, " cpu_has_fma: %s\n " , ggml_cpu_has_fma () ? " true" : " false" );
1141+ fprintf (stream, " cpu_has_gpublas: %s\n " , ggml_cpu_has_gpublas () ? " true" : " false" );
1142+ fprintf (stream, " cpu_has_neon: %s\n " , ggml_cpu_has_neon () ? " true" : " false" );
1143+ fprintf (stream, " cpu_has_f16c: %s\n " , ggml_cpu_has_f16c () ? " true" : " false" );
1144+ fprintf (stream, " cpu_has_fp16_va: %s\n " , ggml_cpu_has_fp16_va () ? " true" : " false" );
1145+ fprintf (stream, " cpu_has_wasm_simd: %s\n " , ggml_cpu_has_wasm_simd () ? " true" : " false" );
1146+ fprintf (stream, " cpu_has_blas: %s\n " , ggml_cpu_has_blas () ? " true" : " false" );
1147+ fprintf (stream, " cpu_has_sse3: %s\n " , ggml_cpu_has_sse3 () ? " true" : " false" );
1148+ fprintf (stream, " cpu_has_vsx: %s\n " , ggml_cpu_has_vsx () ? " true" : " false" );
11481149
11491150#ifdef NDEBUG
11501151 fprintf (stream, " debug: false\n " );
@@ -1178,8 +1179,8 @@ void dump_non_result_info_yaml(FILE * stream, const gpt_params & params, const l
11781179 fprintf (stream, " ctx_size: %d # default: 512\n " , params.n_ctx );
11791180 fprintf (stream, " escape: %s # default: false\n " , params.escape ? " true" : " false" );
11801181 fprintf (stream, " file: # never logged, see prompt instead. Can still be specified for input.\n " );
1181- fprintf (stream, " frequency_penalty: %f # default: 0.0 \n " , sparams.frequency_penalty );
1182- dump_string_yaml_multiline (stream, " grammar" , params .grammar .c_str ());
1182+ fprintf (stream, " frequency_penalty: %f # default: 0.0 \n " , sparams.penalty_freq );
1183+ dump_string_yaml_multiline (stream, " grammar" , sparams .grammar .c_str ());
11831184 fprintf (stream, " grammar-file: # never logged, see grammar instead. Can still be specified for input.\n " );
11841185 fprintf (stream, " hellaswag: %s # default: false\n " , params.hellaswag ? " true" : " false" );
11851186 fprintf (stream, " hellaswag_tasks: %zu # default: 400\n " , params.hellaswag_tasks );
@@ -1238,14 +1239,14 @@ void dump_non_result_info_yaml(FILE * stream, const gpt_params & params, const l
12381239 fprintf (stream, " numa: %s # default: false\n " , params.numa ? " true" : " false" );
12391240 fprintf (stream, " ppl_output_type: %d # default: 0\n " , params.ppl_output_type );
12401241 fprintf (stream, " ppl_stride: %d # default: 0\n " , params.ppl_stride );
1241- fprintf (stream, " presence_penalty: %f # default: 0.0\n " , sparams.presence_penalty );
1242+ fprintf (stream, " presence_penalty: %f # default: 0.0\n " , sparams.penalty_present );
12421243 dump_string_yaml_multiline (stream, " prompt" , params.prompt .c_str ());
12431244 fprintf (stream, " prompt_cache: %s\n " , params.path_prompt_cache .c_str ());
12441245 fprintf (stream, " prompt_cache_all: %s # default: false\n " , params.prompt_cache_all ? " true" : " false" );
12451246 fprintf (stream, " prompt_cache_ro: %s # default: false\n " , params.prompt_cache_ro ? " true" : " false" );
12461247 dump_vector_int_yaml (stream, " prompt_tokens" , prompt_tokens);
12471248 fprintf (stream, " random_prompt: %s # default: false\n " , params.random_prompt ? " true" : " false" );
1248- fprintf (stream, " repeat_penalty: %f # default: 1.1\n " , sparams.repeat_penalty );
1249+ fprintf (stream, " repeat_penalty: %f # default: 1.1\n " , sparams.penalty_repeat );
12491250
12501251 fprintf (stream, " reverse_prompt:\n " );
12511252 for (std::string ap : params.antiprompt ) {
0 commit comments