@@ -36,6 +36,8 @@ static const std::map<std::string, std::vector<llama_token>> & k_tests() {
36
36
{ " Hello" , { 258 , 23090 , }, },
37
37
{ " Hello" , { 466 , 23090 , }, },
38
38
{ " Hello\n Hello" , { 466 , 23090 , 742 , 23090 , }, },
39
+ { " \n =" , { 1212 , 40 , }, },
40
+ { " ' era" , { 18 , 4932 , }, },
39
41
};
40
42
41
43
return _k_tests;
@@ -155,7 +157,7 @@ int main(int argc, char **argv) {
155
157
156
158
fprintf (stderr, " %s : text size: %zu\n " , __func__, text.size ());
157
159
158
- const std::vector<llama_token> res = llama_tokenize (ctx, text, true );
160
+ const std::vector<llama_token> res = llama_tokenize (ctx, text, false );
159
161
160
162
fprintf (stderr, " %s : tokens: %zu\n " , __func__, res.size ());
161
163
@@ -169,10 +171,8 @@ int main(int argc, char **argv) {
169
171
}
170
172
171
173
for (const auto & tok : res) {
172
- ofs << tok << " " ;
174
+ ofs << tok << " ' " << llama_detokenize_bpe (ctx, std::vector< int >{tok}) << " ' " << std::endl ;
173
175
}
174
-
175
- ofs << " \n " ;
176
176
}
177
177
178
178
fprintf (stderr, " %s : tokens written to '%s'\n " , __func__, (fname_text + " .tokcpp" ).c_str ());
0 commit comments