@@ -4335,7 +4335,7 @@ struct llama_beam {
43354335 float p; // Cumulative beam probability (renormalized relative to all beams)
43364336 bool eos; // Initialize end-of-sentence to false. Callback sets this to true.
43374337 // Sort beams by probability. In case of ties, prefer beams at eos.
4338- bool operator <(llama_beam const & rhs) const {
4338+ bool operator <(llama_beam const & rhs) const {
43394339 return std::make_tuple (p, eos) < std::make_tuple (rhs.p , rhs.eos );
43404340 }
43414341 // Shift off first n tokens and discard them.
@@ -4350,15 +4350,15 @@ struct llama_beam {
43504350
43514351// A struct for calculating logit-related info.
43524352struct logit_info {
4353- float const * const logits;
4353+ float const * const logits;
43544354 int const n_vocab;
43554355 float const max_l;
43564356 float const normalizer;
43574357 struct sum_exp {
43584358 float max_l;
43594359 float operator ()(float sum, float l) const { return sum + std::exp (l - max_l); }
43604360 };
4361- logit_info (llama_context* ctx)
4361+ logit_info (llama_context * ctx)
43624362 : logits(llama_get_logits(ctx))
43634363 , n_vocab(llama_n_vocab(ctx))
43644364 , max_l(*std::max_element (logits, logits + n_vocab))
@@ -4376,7 +4376,7 @@ struct logit_info {
43764376 for (llama_token token_id=0 ; token_id<k_min ; ++token_id) {
43774377 min_heap.push_back (get_token_data (token_id));
43784378 }
4379- auto comp = [](llama_token_data const & a, llama_token_data const & b) { return a.logit > b.logit ; };
4379+ auto comp = [](llama_token_data const & a, llama_token_data const & b) { return a.logit > b.logit ; };
43804380 std::make_heap (min_heap.begin (), min_heap.end (), comp);
43814381 for (llama_token token_id=k_min ; token_id<n_vocab ; ++token_id) {
43824382 if (min_heap.front ().logit < logits[token_id]) {
@@ -4432,9 +4432,9 @@ struct beam_search {
44324432 // * Gather elements until the vector is full, then call std::make_heap() on it.
44334433 // * If the heap is full and a new element is found that should be included, pop the
44344434 // least element to the back(), replace it with the new, then push it into the heap.
4435- void fill_next_beams_by_top_probabilities (llama_beam& beam) {
4435+ void fill_next_beams_by_top_probabilities (llama_beam & beam) {
44364436 // Min-heaps use a greater-than comparator.
4437- auto const comp = [](llama_beam const & a, llama_beam const & b) { return a.p > b.p ; };
4437+ auto const comp = [](llama_beam const & a, llama_beam const & b) { return a.p > b.p ; };
44384438 if (beam.eos ) {
44394439 // beam is at end-of-sentence, so just copy it to next_beams if its probability is high enough.
44404440 if (next_beams.size () < n_beams) {
@@ -4516,9 +4516,9 @@ struct beam_search {
45164516 // * any of the beams have not yet reached end-of-sentence, AND
45174517 // * the highest probability beam(s) (plural in case of ties) are not at end-of-sentence
45184518 // (since all other beam probabilities can only decrease)
4519- void loop (llama_beam_search_callback_fn_t const callback, void * const callback_data) {
4519+ void loop (llama_beam_search_callback_fn_t const callback, void * const callback_data) {
45204520 beams.push_back ({{}, 1 .0f , false }); // Start with one empty beam w/ probability = 1.0 and !eos.
4521- auto const not_eos = [](llama_beam const & beam) { return !beam.eos ; };
4521+ auto const not_eos = [](llama_beam const & beam) { return !beam.eos ; };
45224522 for (int i=0 ; i<n_predict && std::any_of (beams.begin (),beams.end (),not_eos) &&
45234523 !beams[top_beam_index ()].eos ; ++i) {
45244524 callback (callback_data, get_beams_state (false )); // Sets common_prefix_length
@@ -4528,8 +4528,8 @@ struct beam_search {
45284528 n_past += common_prefix_length;
45294529 }
45304530 // Zero-out next_beam probabilities to place them last in following min-heap.
4531- std::for_each (next_beams.begin (), next_beams.end (), [](llama_beam& beam) { beam.p = 0 .0f ; });
4532- for (llama_beam& beam : beams) {
4531+ std::for_each (next_beams.begin (), next_beams.end (), [](llama_beam & beam) { beam.p = 0 .0f ; });
4532+ for (llama_beam & beam : beams) {
45334533 beam.shift_tokens (common_prefix_length);
45344534 fill_next_beams_by_top_probabilities (beam);
45354535 }
@@ -4543,10 +4543,10 @@ struct beam_search {
45434543
45444544 // As beams grow, the cumulative probabilities decrease.
45454545 // Renormalize them to avoid floating point underflow.
4546- static void renormalize_beam_probabilities (std::vector<llama_beam>& beams) {
4547- auto const sum_p = [](float sum, llama_beam& beam) { return sum + beam.p ; };
4546+ static void renormalize_beam_probabilities (std::vector<llama_beam> & beams) {
4547+ auto const sum_p = [](float sum, llama_beam & beam) { return sum + beam.p ; };
45484548 float const inv_sum = 1 .0f / std::accumulate (beams.begin (), beams.end (), 0 .0f , sum_p);
4549- std::for_each (beams.begin (), beams.end (), [=](llama_beam& beam) { beam.p *= inv_sum; });
4549+ std::for_each (beams.begin (), beams.end (), [=](llama_beam & beam) { beam.p *= inv_sum; });
45504550 }
45514551
45524552 // Assumes beams is non-empty. Uses llama_beam::operator<() for ordering.
@@ -4564,7 +4564,7 @@ struct beam_search {
45644564};
45654565
45664566void llama_beam_search (llama_context * ctx,
4567- llama_beam_search_callback_fn_t callback, void * callback_data,
4567+ llama_beam_search_callback_fn_t callback, void * callback_data,
45684568 size_t n_beams, int n_past, int n_predict, int n_threads) {
45694569 assert (ctx);
45704570 const int64_t t_start_sample_us = ggml_time_us ();
0 commit comments