diff --git a/common/sampling.cpp b/common/sampling.cpp index 37a0d9c85ae30..6919bb641629a 100644 --- a/common/sampling.cpp +++ b/common/sampling.cpp @@ -190,8 +190,9 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co if (params.mirostat == 0) { if (params.top_n_sigma >= 0) { llama_sampler_chain_add(result->chain, llama_sampler_init_top_k (params.top_k)); - llama_sampler_chain_add(result->chain, llama_sampler_init_temp (params.temp)); + llama_sampler_chain_add(result->chain, llama_sampler_init_temp_ext (params.temp, params.dynatemp_range, params.dynatemp_exponent)); llama_sampler_chain_add(result->chain, llama_sampler_init_top_n_sigma (params.top_n_sigma)); + llama_sampler_chain_add(result->chain, llama_sampler_init_xtc (params.xtc_probability, params.xtc_threshold, params.min_keep, params.seed)); } else { for (const auto & cnstr : params.samplers) { switch (cnstr) { diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 9ffec0a64ad94..a64fdec166a8c 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -145,6 +145,7 @@ struct slot_params { {"top_k", sampling.top_k}, {"top_p", sampling.top_p}, {"min_p", sampling.min_p}, + {"top_n_sigma", sampling.top_n_sigma}, {"xtc_probability", sampling.xtc_probability}, {"xtc_threshold", sampling.xtc_threshold}, {"typical_p", sampling.typ_p}, @@ -247,6 +248,7 @@ struct server_task { params.sampling.top_k = json_value(data, "top_k", defaults.sampling.top_k); params.sampling.top_p = json_value(data, "top_p", defaults.sampling.top_p); params.sampling.min_p = json_value(data, "min_p", defaults.sampling.min_p); + params.sampling.top_n_sigma = json_value(data, "top_n_sigma", defaults.sampling.top_n_sigma); params.sampling.xtc_probability = json_value(data, "xtc_probability", defaults.sampling.xtc_probability); params.sampling.xtc_threshold = json_value(data, "xtc_threshold", defaults.sampling.xtc_threshold); params.sampling.typ_p = json_value(data, "typical_p", defaults.sampling.typ_p);