Skip to content

Commit f24989b

Browse files
committed
llguidance : use attrs to determine special tokens
Previously only size zero tokens were treated as special.
1 parent 3c3635d commit f24989b

File tree

1 file changed

+13
-10
lines changed

1 file changed

+13
-10
lines changed

common/llguidance.cpp

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
#include "llama.h"
12
#include "sampling.h"
23
#include "log.h"
34

@@ -160,19 +161,21 @@ static LlgTokenizer * llama_sampler_llg_new_tokenizer(const llama_vocab * vocab)
160161

161162
llama_token token = i;
162163
auto dp = (char *) token_bytes + offset;
163-
auto size = llama_detokenize(vocab, &token, 1, dp, max_token, false, false);
164+
165+
const auto attrs = llama_vocab_get_attr(vocab, token);
166+
bool is_special = (attrs & LLAMA_TOKEN_ATTR_CONTROL) || (attrs & LLAMA_TOKEN_ATTR_USER_DEFINED);
167+
168+
if (is_special) {
169+
*dp = '\xff'; // special token prefix marker
170+
dp += 1;
171+
}
172+
173+
auto size = llama_detokenize(vocab, &token, 1, dp, max_token, false, true);
164174
if (size < 0) {
165175
GGML_ABORT("llama_detokenize failed\n");
166176
}
167-
if (size == 0) {
168-
size = llama_detokenize(vocab, &token, 1, dp + 1, max_token - 1, false, true);
169-
if (size < 0) {
170-
GGML_ABORT("llama_detokenize failed\n");
171-
}
172-
if (size != 0) {
173-
*dp = '\xff'; // special token prefix marker
174-
size += 1;
175-
}
177+
if (is_special) {
178+
size += 1;
176179
}
177180

178181
token_lens[i] = size;

0 commit comments

Comments
 (0)