From 1bdb6034b0b2f499a6eb889265b7adb1ad734c90 Mon Sep 17 00:00:00 2001 From: CasualAutopsy Date: Sat, 15 Feb 2025 16:52:55 -0500 Subject: [PATCH 1/8] =?UTF-8?q?sampling:=20add=20Top-n=CF=83=20sampler=20t?= =?UTF-8?q?o=20`llama-server`=20and=20sampler=20ordering?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- common/common.h | 2 + common/sampling.cpp | 103 +++++++++++++++++++++---------------- examples/server/server.cpp | 2 + 3 files changed, 63 insertions(+), 44 deletions(-) diff --git a/common/common.h b/common/common.h index 98b9a4464787a..33a92af09ffe5 100644 --- a/common/common.h +++ b/common/common.h @@ -96,6 +96,7 @@ enum common_sampler_type { COMMON_SAMPLER_TYPE_XTC = 8, COMMON_SAMPLER_TYPE_INFILL = 9, COMMON_SAMPLER_TYPE_PENALTIES = 10, + COMMON_SAMPLER_TYPE_N_SIGMA = 11, }; // dimensionality reduction methods, used by cvector-generator @@ -157,6 +158,7 @@ struct common_params_sampling { COMMON_SAMPLER_TYPE_TYPICAL_P, COMMON_SAMPLER_TYPE_TOP_P, COMMON_SAMPLER_TYPE_MIN_P, + COMMON_SAMPLER_TYPE_N_SIGMA, COMMON_SAMPLER_TYPE_XTC, COMMON_SAMPLER_TYPE_TEMPERATURE, }; diff --git a/common/sampling.cpp b/common/sampling.cpp index 37a0d9c85ae30..af74c086b2fea 100644 --- a/common/sampling.cpp +++ b/common/sampling.cpp @@ -188,53 +188,63 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co params.logit_bias.data())); if (params.mirostat == 0) { - if (params.top_n_sigma >= 0) { - llama_sampler_chain_add(result->chain, llama_sampler_init_top_k (params.top_k)); - llama_sampler_chain_add(result->chain, llama_sampler_init_temp (params.temp)); - llama_sampler_chain_add(result->chain, llama_sampler_init_top_n_sigma (params.top_n_sigma)); - } else { - for (const auto & cnstr : params.samplers) { - switch (cnstr) { - case COMMON_SAMPLER_TYPE_DRY: - { - std::vector c_breakers; - c_breakers.reserve(params.dry_sequence_breakers.size()); - for (const auto & str : params.dry_sequence_breakers) { - c_breakers.push_back(str.c_str()); - } - - llama_sampler_chain_add(result->chain, llama_sampler_init_dry (vocab, llama_model_n_ctx_train(model), params.dry_multiplier, params.dry_base, params.dry_allowed_length, params.dry_penalty_last_n, c_breakers.data(), c_breakers.size())); + for (const auto & cnstr : params.samplers) { + switch (cnstr) { + case COMMON_SAMPLER_TYPE_DRY: + { + std::vector c_breakers; + c_breakers.reserve(params.dry_sequence_breakers.size()); + for (const auto & str : params.dry_sequence_breakers) { + c_breakers.push_back(str.c_str()); } - break; - case COMMON_SAMPLER_TYPE_TOP_K: - llama_sampler_chain_add(result->chain, llama_sampler_init_top_k (params.top_k)); - break; - case COMMON_SAMPLER_TYPE_TOP_P: - llama_sampler_chain_add(result->chain, llama_sampler_init_top_p (params.top_p, params.min_keep)); - break; - case COMMON_SAMPLER_TYPE_MIN_P: - llama_sampler_chain_add(result->chain, llama_sampler_init_min_p (params.min_p, params.min_keep)); - break; - case COMMON_SAMPLER_TYPE_XTC: - llama_sampler_chain_add(result->chain, llama_sampler_init_xtc (params.xtc_probability, params.xtc_threshold, params.min_keep, params.seed)); - break; - case COMMON_SAMPLER_TYPE_TYPICAL_P: - llama_sampler_chain_add(result->chain, llama_sampler_init_typical (params.typ_p, params.min_keep)); - break; - case COMMON_SAMPLER_TYPE_TEMPERATURE: - llama_sampler_chain_add(result->chain, llama_sampler_init_temp_ext (params.temp, params.dynatemp_range, params.dynatemp_exponent)); - break; - case COMMON_SAMPLER_TYPE_INFILL: - llama_sampler_chain_add(result->chain, llama_sampler_init_infill (vocab)); - break; - case COMMON_SAMPLER_TYPE_PENALTIES: - llama_sampler_chain_add(result->chain, llama_sampler_init_penalties(params.penalty_last_n, params.penalty_repeat, params.penalty_freq, params.penalty_present)); - break; - default: - GGML_ASSERT(false && "unknown sampler type"); - } + + llama_sampler_chain_add( + result->chain, + llama_sampler_init_dry(vocab, llama_model_n_ctx_train(model), params.dry_multiplier, + params.dry_base, params.dry_allowed_length, + params.dry_penalty_last_n, c_breakers.data(), c_breakers.size())); + } + break; + case COMMON_SAMPLER_TYPE_TOP_K: + llama_sampler_chain_add(result->chain, llama_sampler_init_top_k(params.top_k)); + break; + case COMMON_SAMPLER_TYPE_TOP_P: + llama_sampler_chain_add(result->chain, llama_sampler_init_top_p(params.top_p, params.min_keep)); + break; + case COMMON_SAMPLER_TYPE_MIN_P: + llama_sampler_chain_add(result->chain, llama_sampler_init_min_p(params.min_p, params.min_keep)); + break; + case COMMON_SAMPLER_TYPE_N_SIGMA: + if (params.top_n_sigma >= 0) { + llama_sampler_chain_add(result->chain, llama_sampler_init_top_n_sigma(params.top_n_sigma)); + } + break; + case COMMON_SAMPLER_TYPE_XTC: + llama_sampler_chain_add(result->chain, + llama_sampler_init_xtc(params.xtc_probability, params.xtc_threshold, + params.min_keep, params.seed)); + break; + case COMMON_SAMPLER_TYPE_TYPICAL_P: + llama_sampler_chain_add(result->chain, llama_sampler_init_typical(params.typ_p, params.min_keep)); + break; + case COMMON_SAMPLER_TYPE_TEMPERATURE: + llama_sampler_chain_add( + result->chain, + llama_sampler_init_temp_ext(params.temp, params.dynatemp_range, params.dynatemp_exponent)); + break; + case COMMON_SAMPLER_TYPE_INFILL: + llama_sampler_chain_add(result->chain, llama_sampler_init_infill(vocab)); + break; + case COMMON_SAMPLER_TYPE_PENALTIES: + llama_sampler_chain_add(result->chain, + llama_sampler_init_penalties(params.penalty_last_n, params.penalty_repeat, + params.penalty_freq, params.penalty_present)); + break; + default: + GGML_ASSERT(false && "unknown sampler type"); } } + llama_sampler_chain_add(result->chain, llama_sampler_init_dist(params.seed)); } else if (params.mirostat == 1) { llama_sampler_chain_add(result->chain, llama_sampler_init_temp(params.temp)); @@ -435,6 +445,7 @@ char common_sampler_type_to_chr(enum common_sampler_type cnstr) { case COMMON_SAMPLER_TYPE_TYPICAL_P: return 'y'; case COMMON_SAMPLER_TYPE_TOP_P: return 'p'; case COMMON_SAMPLER_TYPE_MIN_P: return 'm'; + case COMMON_SAMPLER_TYPE_N_SIGMA: return 'n'; case COMMON_SAMPLER_TYPE_TEMPERATURE: return 't'; case COMMON_SAMPLER_TYPE_XTC: return 'x'; case COMMON_SAMPLER_TYPE_INFILL: return 'i'; @@ -450,6 +461,7 @@ std::string common_sampler_type_to_str(enum common_sampler_type cnstr) { case COMMON_SAMPLER_TYPE_TYPICAL_P: return "typ_p"; case COMMON_SAMPLER_TYPE_TOP_P: return "top_p"; case COMMON_SAMPLER_TYPE_MIN_P: return "min_p"; + case COMMON_SAMPLER_TYPE_N_SIGMA: return "top_n_sigma"; case COMMON_SAMPLER_TYPE_TEMPERATURE: return "temperature"; case COMMON_SAMPLER_TYPE_XTC: return "xtc"; case COMMON_SAMPLER_TYPE_INFILL: return "infill"; @@ -465,6 +477,7 @@ std::vector common_sampler_types_from_names(const std::vect { "top_p", COMMON_SAMPLER_TYPE_TOP_P }, { "typ_p", COMMON_SAMPLER_TYPE_TYPICAL_P }, { "min_p", COMMON_SAMPLER_TYPE_MIN_P }, + { "top_n_sigma", COMMON_SAMPLER_TYPE_N_SIGMA }, { "temperature", COMMON_SAMPLER_TYPE_TEMPERATURE }, { "xtc", COMMON_SAMPLER_TYPE_XTC }, { "infill", COMMON_SAMPLER_TYPE_INFILL }, @@ -482,6 +495,7 @@ std::vector common_sampler_types_from_names(const std::vect { "typ-p", COMMON_SAMPLER_TYPE_TYPICAL_P }, { "typ", COMMON_SAMPLER_TYPE_TYPICAL_P }, { "min-p", COMMON_SAMPLER_TYPE_MIN_P }, + { "nsigma", COMMON_SAMPLER_TYPE_N_SIGMA }, { "temp", COMMON_SAMPLER_TYPE_TEMPERATURE }, }; @@ -512,6 +526,7 @@ std::vector common_sampler_types_from_chars(const std::stri { common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_TYPICAL_P), COMMON_SAMPLER_TYPE_TYPICAL_P }, { common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_TOP_P), COMMON_SAMPLER_TYPE_TOP_P }, { common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_MIN_P), COMMON_SAMPLER_TYPE_MIN_P }, + { common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_N_SIGMA), COMMON_SAMPLER_TYPE_N_SIGMA }, { common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_TEMPERATURE), COMMON_SAMPLER_TYPE_TEMPERATURE }, { common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_XTC), COMMON_SAMPLER_TYPE_XTC }, { common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_INFILL), COMMON_SAMPLER_TYPE_INFILL }, diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 9ffec0a64ad94..a64fdec166a8c 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -145,6 +145,7 @@ struct slot_params { {"top_k", sampling.top_k}, {"top_p", sampling.top_p}, {"min_p", sampling.min_p}, + {"top_n_sigma", sampling.top_n_sigma}, {"xtc_probability", sampling.xtc_probability}, {"xtc_threshold", sampling.xtc_threshold}, {"typical_p", sampling.typ_p}, @@ -247,6 +248,7 @@ struct server_task { params.sampling.top_k = json_value(data, "top_k", defaults.sampling.top_k); params.sampling.top_p = json_value(data, "top_p", defaults.sampling.top_p); params.sampling.min_p = json_value(data, "min_p", defaults.sampling.min_p); + params.sampling.top_n_sigma = json_value(data, "top_n_sigma", defaults.sampling.top_n_sigma); params.sampling.xtc_probability = json_value(data, "xtc_probability", defaults.sampling.xtc_probability); params.sampling.xtc_threshold = json_value(data, "xtc_threshold", defaults.sampling.xtc_threshold); params.sampling.typ_p = json_value(data, "typical_p", defaults.sampling.typ_p); From ff8b61228f50a7a1bfa0b6ad1e12b5b5deff5f99 Mon Sep 17 00:00:00 2001 From: CasualAutopsy Date: Sat, 15 Feb 2025 18:13:38 -0500 Subject: [PATCH 2/8] revert: sampler ordering --- common/sampling.cpp | 105 ++++++++++++++++++++++---------------------- 1 file changed, 53 insertions(+), 52 deletions(-) diff --git a/common/sampling.cpp b/common/sampling.cpp index af74c086b2fea..7cf235e2e19a7 100644 --- a/common/sampling.cpp +++ b/common/sampling.cpp @@ -188,63 +188,64 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co params.logit_bias.data())); if (params.mirostat == 0) { - for (const auto & cnstr : params.samplers) { - switch (cnstr) { - case COMMON_SAMPLER_TYPE_DRY: - { - std::vector c_breakers; - c_breakers.reserve(params.dry_sequence_breakers.size()); - for (const auto & str : params.dry_sequence_breakers) { - c_breakers.push_back(str.c_str()); + if (params.top_n_sigma >= 0) { + llama_sampler_chain_add(result->chain, llama_sampler_init_top_k(params.top_k)); + llama_sampler_chain_add(result->chain, llama_sampler_init_temp(params.temp)); + llama_sampler_chain_add(result->chain, llama_sampler_init_top_n_sigma(params.top_n_sigma)); + } else { + for (const auto & cnstr : params.samplers) { + switch (cnstr) { + case COMMON_SAMPLER_TYPE_DRY: + { + std::vector c_breakers; + c_breakers.reserve(params.dry_sequence_breakers.size()); + for (const auto & str : params.dry_sequence_breakers) { + c_breakers.push_back(str.c_str()); + } + + llama_sampler_chain_add( + result->chain, llama_sampler_init_dry( + vocab, llama_model_n_ctx_train(model), params.dry_multiplier, + params.dry_base, params.dry_allowed_length, + params.dry_penalty_last_n, c_breakers.data(), c_breakers.size())); } - + break; + case COMMON_SAMPLER_TYPE_TOP_K: + llama_sampler_chain_add(result->chain, llama_sampler_init_top_k(params.top_k)); + break; + case COMMON_SAMPLER_TYPE_TOP_P: + llama_sampler_chain_add(result->chain, llama_sampler_init_top_p(params.top_p, params.min_keep)); + break; + case COMMON_SAMPLER_TYPE_MIN_P: + llama_sampler_chain_add(result->chain, llama_sampler_init_min_p(params.min_p, params.min_keep)); + break; + case COMMON_SAMPLER_TYPE_XTC: + llama_sampler_chain_add(result->chain, + llama_sampler_init_xtc(params.xtc_probability, params.xtc_threshold, + params.min_keep, params.seed)); + break; + case COMMON_SAMPLER_TYPE_TYPICAL_P: + llama_sampler_chain_add(result->chain, + llama_sampler_init_typical(params.typ_p, params.min_keep)); + break; + case COMMON_SAMPLER_TYPE_TEMPERATURE: llama_sampler_chain_add( result->chain, - llama_sampler_init_dry(vocab, llama_model_n_ctx_train(model), params.dry_multiplier, - params.dry_base, params.dry_allowed_length, - params.dry_penalty_last_n, c_breakers.data(), c_breakers.size())); - } - break; - case COMMON_SAMPLER_TYPE_TOP_K: - llama_sampler_chain_add(result->chain, llama_sampler_init_top_k(params.top_k)); - break; - case COMMON_SAMPLER_TYPE_TOP_P: - llama_sampler_chain_add(result->chain, llama_sampler_init_top_p(params.top_p, params.min_keep)); - break; - case COMMON_SAMPLER_TYPE_MIN_P: - llama_sampler_chain_add(result->chain, llama_sampler_init_min_p(params.min_p, params.min_keep)); - break; - case COMMON_SAMPLER_TYPE_N_SIGMA: - if (params.top_n_sigma >= 0) { - llama_sampler_chain_add(result->chain, llama_sampler_init_top_n_sigma(params.top_n_sigma)); - } - break; - case COMMON_SAMPLER_TYPE_XTC: - llama_sampler_chain_add(result->chain, - llama_sampler_init_xtc(params.xtc_probability, params.xtc_threshold, - params.min_keep, params.seed)); - break; - case COMMON_SAMPLER_TYPE_TYPICAL_P: - llama_sampler_chain_add(result->chain, llama_sampler_init_typical(params.typ_p, params.min_keep)); - break; - case COMMON_SAMPLER_TYPE_TEMPERATURE: - llama_sampler_chain_add( - result->chain, - llama_sampler_init_temp_ext(params.temp, params.dynatemp_range, params.dynatemp_exponent)); - break; - case COMMON_SAMPLER_TYPE_INFILL: - llama_sampler_chain_add(result->chain, llama_sampler_init_infill(vocab)); - break; - case COMMON_SAMPLER_TYPE_PENALTIES: - llama_sampler_chain_add(result->chain, - llama_sampler_init_penalties(params.penalty_last_n, params.penalty_repeat, - params.penalty_freq, params.penalty_present)); - break; - default: - GGML_ASSERT(false && "unknown sampler type"); + llama_sampler_init_temp_ext(params.temp, params.dynatemp_range, params.dynatemp_exponent)); + break; + case COMMON_SAMPLER_TYPE_INFILL: + llama_sampler_chain_add(result->chain, llama_sampler_init_infill(vocab)); + break; + case COMMON_SAMPLER_TYPE_PENALTIES: + llama_sampler_chain_add( + result->chain, llama_sampler_init_penalties(params.penalty_last_n, params.penalty_repeat, + params.penalty_freq, params.penalty_present)); + break; + default: + GGML_ASSERT(false && "unknown sampler type"); + } } } - llama_sampler_chain_add(result->chain, llama_sampler_init_dist(params.seed)); } else if (params.mirostat == 1) { llama_sampler_chain_add(result->chain, llama_sampler_init_temp(params.temp)); From 1dc5d84f2277020b1148dff99be07137ca11e8e1 Mon Sep 17 00:00:00 2001 From: CasualAutopsy Date: Sat, 15 Feb 2025 18:20:46 -0500 Subject: [PATCH 3/8] revert: VS' crappy auto-formatting --- common/sampling.cpp | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/common/sampling.cpp b/common/sampling.cpp index 7cf235e2e19a7..77218b11e2276 100644 --- a/common/sampling.cpp +++ b/common/sampling.cpp @@ -220,26 +220,19 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co llama_sampler_chain_add(result->chain, llama_sampler_init_min_p(params.min_p, params.min_keep)); break; case COMMON_SAMPLER_TYPE_XTC: - llama_sampler_chain_add(result->chain, - llama_sampler_init_xtc(params.xtc_probability, params.xtc_threshold, - params.min_keep, params.seed)); + llama_sampler_chain_add(result->chain, llama_sampler_init_xtc(params.xtc_probability, params.xtc_threshold, params.min_keep, params.seed)); break; case COMMON_SAMPLER_TYPE_TYPICAL_P: - llama_sampler_chain_add(result->chain, - llama_sampler_init_typical(params.typ_p, params.min_keep)); + llama_sampler_chain_add(result->chain, llama_sampler_init_typical(params.typ_p, params.min_keep)); break; case COMMON_SAMPLER_TYPE_TEMPERATURE: - llama_sampler_chain_add( - result->chain, - llama_sampler_init_temp_ext(params.temp, params.dynatemp_range, params.dynatemp_exponent)); + llama_sampler_chain_add(result->chain, llama_sampler_init_temp_ext(params.temp, params.dynatemp_range, params.dynatemp_exponent)); break; case COMMON_SAMPLER_TYPE_INFILL: llama_sampler_chain_add(result->chain, llama_sampler_init_infill(vocab)); break; case COMMON_SAMPLER_TYPE_PENALTIES: - llama_sampler_chain_add( - result->chain, llama_sampler_init_penalties(params.penalty_last_n, params.penalty_repeat, - params.penalty_freq, params.penalty_present)); + llama_sampler_chain_add(result->chain, llama_sampler_init_penalties(params.penalty_last_n, params.penalty_repeat, params.penalty_freq, params.penalty_present)); break; default: GGML_ASSERT(false && "unknown sampler type"); From 90680686900f5c44e3493e2efe3f9c0869cb929d Mon Sep 17 00:00:00 2001 From: CasualAutopsy Date: Sat, 15 Feb 2025 18:26:56 -0500 Subject: [PATCH 4/8] revert: VS' crappy auto-formatting pt.2 --- common/sampling.cpp | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/common/sampling.cpp b/common/sampling.cpp index 77218b11e2276..325fae5379229 100644 --- a/common/sampling.cpp +++ b/common/sampling.cpp @@ -189,9 +189,9 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co if (params.mirostat == 0) { if (params.top_n_sigma >= 0) { - llama_sampler_chain_add(result->chain, llama_sampler_init_top_k(params.top_k)); - llama_sampler_chain_add(result->chain, llama_sampler_init_temp(params.temp)); - llama_sampler_chain_add(result->chain, llama_sampler_init_top_n_sigma(params.top_n_sigma)); + llama_sampler_chain_add(result->chain, llama_sampler_init_top_k (params.top_k)); + llama_sampler_chain_add(result->chain, llama_sampler_init_temp (params.temp)); + llama_sampler_chain_add(result->chain, llama_sampler_init_top_n_sigma (params.top_n_sigma)); } else { for (const auto & cnstr : params.samplers) { switch (cnstr) { @@ -203,36 +203,32 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co c_breakers.push_back(str.c_str()); } - llama_sampler_chain_add( - result->chain, llama_sampler_init_dry( - vocab, llama_model_n_ctx_train(model), params.dry_multiplier, - params.dry_base, params.dry_allowed_length, - params.dry_penalty_last_n, c_breakers.data(), c_breakers.size())); + llama_sampler_chain_add(result->chain, llama_sampler_init_dry(vocab, llama_model_n_ctx_train(model), params.dry_multiplier, params.dry_base, params.dry_allowed_length, params.dry_penalty_last_n, c_breakers.data(), c_breakers.size())); } break; case COMMON_SAMPLER_TYPE_TOP_K: - llama_sampler_chain_add(result->chain, llama_sampler_init_top_k(params.top_k)); + llama_sampler_chain_add(result->chain, llama_sampler_init_top_k (params.top_k)); break; case COMMON_SAMPLER_TYPE_TOP_P: - llama_sampler_chain_add(result->chain, llama_sampler_init_top_p(params.top_p, params.min_keep)); + llama_sampler_chain_add(result->chain, llama_sampler_init_top_p (params.top_p, params.min_keep)); break; case COMMON_SAMPLER_TYPE_MIN_P: - llama_sampler_chain_add(result->chain, llama_sampler_init_min_p(params.min_p, params.min_keep)); + llama_sampler_chain_add(result->chain, llama_sampler_init_min_p (params.min_p, params.min_keep)); break; case COMMON_SAMPLER_TYPE_XTC: - llama_sampler_chain_add(result->chain, llama_sampler_init_xtc(params.xtc_probability, params.xtc_threshold, params.min_keep, params.seed)); + llama_sampler_chain_add(result->chain, llama_sampler_init_xtc (params.xtc_probability, params.xtc_threshold, params.min_keep, params.seed)); break; case COMMON_SAMPLER_TYPE_TYPICAL_P: - llama_sampler_chain_add(result->chain, llama_sampler_init_typical(params.typ_p, params.min_keep)); + llama_sampler_chain_add(result->chain, llama_sampler_init_typical (params.typ_p, params.min_keep)); break; case COMMON_SAMPLER_TYPE_TEMPERATURE: - llama_sampler_chain_add(result->chain, llama_sampler_init_temp_ext(params.temp, params.dynatemp_range, params.dynatemp_exponent)); + llama_sampler_chain_add(result->chain, llama_sampler_init_temp_ext (params.temp, params.dynatemp_range, params.dynatemp_exponent)); break; case COMMON_SAMPLER_TYPE_INFILL: - llama_sampler_chain_add(result->chain, llama_sampler_init_infill(vocab)); + llama_sampler_chain_add(result->chain, llama_sampler_init_infill (vocab)); break; case COMMON_SAMPLER_TYPE_PENALTIES: - llama_sampler_chain_add(result->chain, llama_sampler_init_penalties(params.penalty_last_n, params.penalty_repeat, params.penalty_freq, params.penalty_present)); + llama_sampler_chain_add(result->chain, llama_sampler_init_penalties (params.penalty_last_n, params.penalty_repeat, params.penalty_freq, params.penalty_present)); break; default: GGML_ASSERT(false && "unknown sampler type"); From c05e9e078666f210fc80fb20c0a77df7f6bb9e85 Mon Sep 17 00:00:00 2001 From: CasualAutopsy Date: Sat, 15 Feb 2025 18:29:05 -0500 Subject: [PATCH 5/8] revert: my crappy eye sight... --- common/sampling.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/common/sampling.cpp b/common/sampling.cpp index 325fae5379229..47c70208e3be4 100644 --- a/common/sampling.cpp +++ b/common/sampling.cpp @@ -222,13 +222,13 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co llama_sampler_chain_add(result->chain, llama_sampler_init_typical (params.typ_p, params.min_keep)); break; case COMMON_SAMPLER_TYPE_TEMPERATURE: - llama_sampler_chain_add(result->chain, llama_sampler_init_temp_ext (params.temp, params.dynatemp_range, params.dynatemp_exponent)); + llama_sampler_chain_add(result->chain, llama_sampler_init_temp_ext (params.temp, params.dynatemp_range, params.dynatemp_exponent)); break; case COMMON_SAMPLER_TYPE_INFILL: - llama_sampler_chain_add(result->chain, llama_sampler_init_infill (vocab)); + llama_sampler_chain_add(result->chain, llama_sampler_init_infill (vocab)); break; case COMMON_SAMPLER_TYPE_PENALTIES: - llama_sampler_chain_add(result->chain, llama_sampler_init_penalties (params.penalty_last_n, params.penalty_repeat, params.penalty_freq, params.penalty_present)); + llama_sampler_chain_add(result->chain, llama_sampler_init_penalties(params.penalty_last_n, params.penalty_repeat, params.penalty_freq, params.penalty_present)); break; default: GGML_ASSERT(false && "unknown sampler type"); From a9e7af0b73cfa291548d64d7920b5fd86e88273b Mon Sep 17 00:00:00 2001 From: CasualAutopsy Date: Sat, 15 Feb 2025 19:34:06 -0500 Subject: [PATCH 6/8] =?UTF-8?q?sampling:=20add=20XTC=20to=20Top-n=CF=83=20?= =?UTF-8?q?sampler=20chain?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- common/sampling.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/common/sampling.cpp b/common/sampling.cpp index 47c70208e3be4..7fec8524ef45e 100644 --- a/common/sampling.cpp +++ b/common/sampling.cpp @@ -192,6 +192,7 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co llama_sampler_chain_add(result->chain, llama_sampler_init_top_k (params.top_k)); llama_sampler_chain_add(result->chain, llama_sampler_init_temp (params.temp)); llama_sampler_chain_add(result->chain, llama_sampler_init_top_n_sigma (params.top_n_sigma)); + llama_sampler_chain_add(result->chain, llama_sampler_init_xtc (params.xtc_probability, params.xtc_threshold, params.min_keep, params.seed)); } else { for (const auto & cnstr : params.samplers) { switch (cnstr) { @@ -203,7 +204,7 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co c_breakers.push_back(str.c_str()); } - llama_sampler_chain_add(result->chain, llama_sampler_init_dry(vocab, llama_model_n_ctx_train(model), params.dry_multiplier, params.dry_base, params.dry_allowed_length, params.dry_penalty_last_n, c_breakers.data(), c_breakers.size())); + llama_sampler_chain_add(result->chain, llama_sampler_init_dry (vocab, llama_model_n_ctx_train(model), params.dry_multiplier, params.dry_base, params.dry_allowed_length, params.dry_penalty_last_n, c_breakers.data(), c_breakers.size())); } break; case COMMON_SAMPLER_TYPE_TOP_K: From cc1a170360e44be59577c76b9f5f8d1d2be46362 Mon Sep 17 00:00:00 2001 From: CasualAutopsy Date: Sat, 15 Feb 2025 19:37:39 -0500 Subject: [PATCH 7/8] =?UTF-8?q?sampling:=20add=20Dyna.=20Temp.=20to=20Top-?= =?UTF-8?q?n=CF=83=20sampler=20chain?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- common/sampling.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/sampling.cpp b/common/sampling.cpp index 7fec8524ef45e..74894090d5649 100644 --- a/common/sampling.cpp +++ b/common/sampling.cpp @@ -190,7 +190,7 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co if (params.mirostat == 0) { if (params.top_n_sigma >= 0) { llama_sampler_chain_add(result->chain, llama_sampler_init_top_k (params.top_k)); - llama_sampler_chain_add(result->chain, llama_sampler_init_temp (params.temp)); + llama_sampler_chain_add(result->chain, llama_sampler_init_temp_ext (params.temp, params.dynatemp_range, params.dynatemp_exponent)); llama_sampler_chain_add(result->chain, llama_sampler_init_top_n_sigma (params.top_n_sigma)); llama_sampler_chain_add(result->chain, llama_sampler_init_xtc (params.xtc_probability, params.xtc_threshold, params.min_keep, params.seed)); } else { From a558d3a3636ca615e803ab18c7e8ce47809023d4 Mon Sep 17 00:00:00 2001 From: CasualAutopsy Date: Sun, 16 Feb 2025 13:26:16 -0500 Subject: [PATCH 8/8] =?UTF-8?q?sampling:=20actually=20remove=20Top-n=CF=83?= =?UTF-8?q?=20from=20sampler(oops)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- common/common.h | 2 -- common/sampling.cpp | 5 ----- 2 files changed, 7 deletions(-) diff --git a/common/common.h b/common/common.h index 33a92af09ffe5..98b9a4464787a 100644 --- a/common/common.h +++ b/common/common.h @@ -96,7 +96,6 @@ enum common_sampler_type { COMMON_SAMPLER_TYPE_XTC = 8, COMMON_SAMPLER_TYPE_INFILL = 9, COMMON_SAMPLER_TYPE_PENALTIES = 10, - COMMON_SAMPLER_TYPE_N_SIGMA = 11, }; // dimensionality reduction methods, used by cvector-generator @@ -158,7 +157,6 @@ struct common_params_sampling { COMMON_SAMPLER_TYPE_TYPICAL_P, COMMON_SAMPLER_TYPE_TOP_P, COMMON_SAMPLER_TYPE_MIN_P, - COMMON_SAMPLER_TYPE_N_SIGMA, COMMON_SAMPLER_TYPE_XTC, COMMON_SAMPLER_TYPE_TEMPERATURE, }; diff --git a/common/sampling.cpp b/common/sampling.cpp index 74894090d5649..6919bb641629a 100644 --- a/common/sampling.cpp +++ b/common/sampling.cpp @@ -436,7 +436,6 @@ char common_sampler_type_to_chr(enum common_sampler_type cnstr) { case COMMON_SAMPLER_TYPE_TYPICAL_P: return 'y'; case COMMON_SAMPLER_TYPE_TOP_P: return 'p'; case COMMON_SAMPLER_TYPE_MIN_P: return 'm'; - case COMMON_SAMPLER_TYPE_N_SIGMA: return 'n'; case COMMON_SAMPLER_TYPE_TEMPERATURE: return 't'; case COMMON_SAMPLER_TYPE_XTC: return 'x'; case COMMON_SAMPLER_TYPE_INFILL: return 'i'; @@ -452,7 +451,6 @@ std::string common_sampler_type_to_str(enum common_sampler_type cnstr) { case COMMON_SAMPLER_TYPE_TYPICAL_P: return "typ_p"; case COMMON_SAMPLER_TYPE_TOP_P: return "top_p"; case COMMON_SAMPLER_TYPE_MIN_P: return "min_p"; - case COMMON_SAMPLER_TYPE_N_SIGMA: return "top_n_sigma"; case COMMON_SAMPLER_TYPE_TEMPERATURE: return "temperature"; case COMMON_SAMPLER_TYPE_XTC: return "xtc"; case COMMON_SAMPLER_TYPE_INFILL: return "infill"; @@ -468,7 +466,6 @@ std::vector common_sampler_types_from_names(const std::vect { "top_p", COMMON_SAMPLER_TYPE_TOP_P }, { "typ_p", COMMON_SAMPLER_TYPE_TYPICAL_P }, { "min_p", COMMON_SAMPLER_TYPE_MIN_P }, - { "top_n_sigma", COMMON_SAMPLER_TYPE_N_SIGMA }, { "temperature", COMMON_SAMPLER_TYPE_TEMPERATURE }, { "xtc", COMMON_SAMPLER_TYPE_XTC }, { "infill", COMMON_SAMPLER_TYPE_INFILL }, @@ -486,7 +483,6 @@ std::vector common_sampler_types_from_names(const std::vect { "typ-p", COMMON_SAMPLER_TYPE_TYPICAL_P }, { "typ", COMMON_SAMPLER_TYPE_TYPICAL_P }, { "min-p", COMMON_SAMPLER_TYPE_MIN_P }, - { "nsigma", COMMON_SAMPLER_TYPE_N_SIGMA }, { "temp", COMMON_SAMPLER_TYPE_TEMPERATURE }, }; @@ -517,7 +513,6 @@ std::vector common_sampler_types_from_chars(const std::stri { common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_TYPICAL_P), COMMON_SAMPLER_TYPE_TYPICAL_P }, { common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_TOP_P), COMMON_SAMPLER_TYPE_TOP_P }, { common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_MIN_P), COMMON_SAMPLER_TYPE_MIN_P }, - { common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_N_SIGMA), COMMON_SAMPLER_TYPE_N_SIGMA }, { common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_TEMPERATURE), COMMON_SAMPLER_TYPE_TEMPERATURE }, { common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_XTC), COMMON_SAMPLER_TYPE_XTC }, { common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_INFILL), COMMON_SAMPLER_TYPE_INFILL },