Skip to content

Commit dd1af2e

Browse files
committed
server : minor style
1 parent a4d69d8 commit dd1af2e

File tree

1 file changed

+98
-47
lines changed

1 file changed

+98
-47
lines changed

examples/server/server.cpp

Lines changed: 98 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -391,18 +391,19 @@ struct llama_client_slot
391391
double t_token_generation; // ms
392392

393393
void reset() {
394-
num_prompt_tokens = 0;
395-
generated_text = "";
396-
truncated = false;
397-
stopped_eos = false;
398-
stopped_word = false;
399-
stopped_limit = false;
400-
stopping_word = "";
401-
multibyte_pending = 0;
402-
n_past = 0;
403-
sent_count = 0;
394+
num_prompt_tokens = 0;
395+
generated_text = "";
396+
truncated = false;
397+
stopped_eos = false;
398+
stopped_word = false;
399+
stopped_limit = false;
400+
stopping_word = "";
401+
multibyte_pending = 0;
402+
n_past = 0;
403+
sent_count = 0;
404404
sent_token_probs_index = 0;
405-
infill = false;
405+
infill = false;
406+
406407
generated_token_probs.clear();
407408

408409
for (slot_image &img : images)
@@ -882,7 +883,8 @@ struct llama_server_context
882883

883884
// wait until system prompt load
884885
system_need_update = true;
885-
while (system_need_update) {
886+
while (system_need_update)
887+
{
886888
std::this_thread::sleep_for(std::chrono::milliseconds(5));
887889
}
888890
// system prompt loaded, continue
@@ -997,26 +999,31 @@ struct llama_server_context
997999
const std::string str_test = slot.generated_text.substr(pos);
9981000
bool is_stop_full = false;
9991001
size_t stop_pos = find_stopping_strings(str_test, token_str.size(), STOP_FULL, slot);
1000-
if (stop_pos != std::string::npos) {
1002+
if (stop_pos != std::string::npos)
1003+
{
10011004
is_stop_full = true;
10021005
slot.generated_text.erase(
10031006
slot.generated_text.begin() + pos + stop_pos,
10041007
slot.generated_text.end());
10051008
pos = std::min(slot.sent_count, slot.generated_text.size());
1006-
} else {
1009+
}
1010+
else
1011+
{
10071012
is_stop_full = false;
10081013
stop_pos = find_stopping_strings(str_test, token_str.size(), STOP_PARTIAL, slot);
10091014
}
10101015

10111016
// check if there is any token to predict
1012-
if(stop_pos == std::string::npos || (!slot.has_next_token && !is_stop_full && stop_pos > 0)) {
1017+
if (stop_pos == std::string::npos || (!slot.has_next_token && !is_stop_full && stop_pos > 0))
1018+
{
10131019
// no send the stop word in the response
10141020
result.text_to_send = slot.generated_text.substr(pos, std::string::npos);
10151021
slot.sent_count += result.text_to_send.size();
10161022
// add the token to slot queue and cache
10171023
}
10181024
slot.add_token_string(result);
1019-
if (slot.params.stream) {
1025+
if (slot.params.stream)
1026+
{
10201027
send_partial_response(slot, result);
10211028
}
10221029
}
@@ -1051,6 +1058,7 @@ struct llama_server_context
10511058
{"stopped_limit", slot.stopped_limit},
10521059
{"stopping_word", slot.stopping_word},
10531060
});
1061+
10541062
return slot.has_next_token; // continue
10551063
}
10561064

@@ -1089,7 +1097,8 @@ struct llama_server_context
10891097
return slot.images.size() > 0;
10901098
}
10911099

1092-
void send_error(int id, std::string error) {
1100+
void send_error(int id, std::string error)
1101+
{
10931102
std::lock_guard<std::mutex> lock(mutex_results);
10941103
task_result res;
10951104
res.id = id;
@@ -1098,11 +1107,13 @@ struct llama_server_context
10981107
queue_results.push_back(res);
10991108
}
11001109

1101-
json get_model_props() {
1110+
json get_model_props()
1111+
{
11021112
return get_formated_generation(slots[0]);
11031113
}
11041114

1105-
json get_formated_generation(llama_client_slot &slot) {
1115+
json get_formated_generation(llama_client_slot &slot)
1116+
{
11061117
const auto eos_bias = slot.sparams.logit_bias.find(llama_token_eos(ctx));
11071118
const bool ignore_eos = eos_bias != slot.sparams.logit_bias.end() &&
11081119
eos_bias->second < 0.0f && std::isinf(eos_bias->second);
@@ -1134,19 +1145,22 @@ struct llama_server_context
11341145
};
11351146
}
11361147

1137-
void send_partial_response(llama_client_slot & slot, completion_token_output tkn) {
1148+
void send_partial_response(llama_client_slot &slot, completion_token_output tkn)
1149+
{
11381150
std::lock_guard<std::mutex> lock(mutex_results);
11391151
task_result res;
11401152
res.id = slot.task_id;
11411153
res.error = false;
11421154
res.stop = false;
1155+
11431156
res.result_json = json
11441157
{
11451158
{"content", tkn.text_to_send},
11461159
{"stop", false},
11471160
{"slot_id", slot.id},
11481161
{"multimodal", multimodal}
11491162
};
1163+
11501164
if (slot.sparams.n_probs > 0)
11511165
{
11521166
std::vector<completion_token_output> probs_output = {};
@@ -1160,15 +1174,18 @@ struct llama_server_context
11601174
slot.sent_token_probs_index = probs_stop_pos;
11611175
res.result_json["completion_probabilities"] = probs_vector_to_json(ctx, probs_output);
11621176
}
1177+
11631178
queue_results.push_back(res);
11641179
}
11651180

1166-
void send_final_response(llama_client_slot & slot) {
1181+
void send_final_response(llama_client_slot &slot)
1182+
{
11671183
std::lock_guard<std::mutex> lock(mutex_results);
11681184
task_result res;
11691185
res.id = slot.task_id;
11701186
res.error = false;
11711187
res.stop = true;
1188+
11721189
res.result_json = json
11731190
{
11741191
{"content", !slot.params.stream ? slot.generated_text : ""},
@@ -1191,20 +1208,25 @@ struct llama_server_context
11911208
if (slot.sparams.n_probs > 0)
11921209
{
11931210
std::vector<completion_token_output> probs = {};
1194-
if(!slot.params.stream && slot.stopped_word) {
1211+
if (!slot.params.stream && slot.stopped_word)
1212+
{
11951213
const std::vector<llama_token> stop_word_toks = llama_tokenize(ctx, slot.stopping_word, false);
11961214
probs = std::vector<completion_token_output>(slot.generated_token_probs.begin(), slot.generated_token_probs.end() - stop_word_toks.size());
1197-
} else {
1215+
}
1216+
else
1217+
{
11981218
probs = std::vector<completion_token_output>(
11991219
slot.generated_token_probs.begin(),
12001220
slot.generated_token_probs.begin() + slot.sent_token_probs_index);
12011221
}
12021222
res.result_json["completion_probabilities"] = probs_vector_to_json(ctx, probs);
12031223
}
1224+
12041225
queue_results.push_back(res);
12051226
}
12061227

1207-
void send_embedding(llama_client_slot & slot) {
1228+
void send_embedding(llama_client_slot &slot)
1229+
{
12081230
std::lock_guard<std::mutex> lock(mutex_results);
12091231
task_result res;
12101232
res.id = slot.task_id;
@@ -1234,7 +1256,8 @@ struct llama_server_context
12341256
queue_results.push_back(res);
12351257
}
12361258

1237-
int request_completion(json data, bool infill) {
1259+
int request_completion(json data, bool infill)
1260+
{
12381261
std::lock_guard<std::mutex> lock(mutex_tasks);
12391262
task_server task;
12401263
task.id = id_gen++;
@@ -1245,17 +1268,22 @@ struct llama_server_context
12451268
return task.id;
12461269
}
12471270

1248-
task_result next_result(int task_id) {
1249-
while (true) {
1271+
task_result next_result(int task_id)
1272+
{
1273+
while (true)
1274+
{
12501275
std::this_thread::sleep_for(std::chrono::microseconds(5));
12511276
std::lock_guard<std::mutex> lock(mutex_results);
12521277

1253-
if (queue_results.empty()) {
1278+
if (queue_results.empty())
1279+
{
12541280
continue;
12551281
}
12561282

1257-
for (int i = 0; i < (int) queue_results.size(); i++) {
1258-
if (queue_results[i].id == task_id) {
1283+
for (int i = 0; i < (int) queue_results.size(); i++)
1284+
{
1285+
if (queue_results[i].id == task_id)
1286+
{
12591287
task_result res = queue_results[i];
12601288
queue_results.erase(queue_results.begin() + i);
12611289
return res;
@@ -1335,7 +1363,8 @@ struct llama_server_context
13351363
return true;
13361364
}
13371365

1338-
void request_cancel(int task_id) {
1366+
void request_cancel(int task_id)
1367+
{
13391368
std::lock_guard<std::mutex> lock(mutex_tasks);
13401369
task_server task;
13411370
task.id = id_gen++;
@@ -1344,9 +1373,11 @@ struct llama_server_context
13441373
queue_tasks.push_back(task);
13451374
}
13461375

1347-
void process_tasks() {
1376+
void process_tasks()
1377+
{
13481378
std::lock_guard<std::mutex> lock(mutex_tasks);
1349-
while (!queue_tasks.empty()) {
1379+
while (!queue_tasks.empty())
1380+
{
13501381
task_server task = queue_tasks.front();
13511382
queue_tasks.erase(queue_tasks.begin());
13521383
switch (task.type)
@@ -1379,8 +1410,10 @@ struct llama_server_context
13791410
}
13801411
} break;
13811412
case CANCEL_TASK: { // release slot linked with the task id
1382-
for (auto & slot : slots) {
1383-
if (slot.task_id == task.target_id) {
1413+
for (auto & slot : slots)
1414+
{
1415+
if (slot.task_id == task.target_id)
1416+
{
13841417
slot.release();
13851418
break;
13861419
}
@@ -2006,7 +2039,8 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,
20062039
else if (arg == "--embedding")
20072040
{
20082041
params.embedding = true;
2009-
} else if (arg == "-cb" || arg == "--cont-batching")
2042+
}
2043+
else if (arg == "-cb" || arg == "--cont-batching")
20102044
{
20112045
params.cont_batching = true;
20122046
}
@@ -2047,7 +2081,8 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,
20472081
);
20482082
llama.process_system_prompt_data(json::parse(systm_content));
20492083
}
2050-
else if(arg == "--mmproj") {
2084+
else if(arg == "--mmproj")
2085+
{
20512086
if (++i >= argc)
20522087
{
20532088
invalid_param = true;
@@ -2163,6 +2198,7 @@ int main(int argc, char **argv)
21632198

21642199
LOG_INFO("build info", {{"build", BUILD_NUMBER},
21652200
{"commit", BUILD_COMMIT}});
2201+
21662202
LOG_INFO("system info", {
21672203
{"n_threads", params.n_threads},
21682204
{"n_threads_batch", params.n_threads_batch},
@@ -2239,10 +2275,12 @@ int main(int argc, char **argv)
22392275
return;
22402276
}
22412277
} else {
2242-
const auto chunked_content_provider = [task_id, &llama](size_t, httplib::DataSink & sink) {
2243-
while(true) {
2278+
const auto chunked_content_provider = [task_id, &llama](size_t, httplib::DataSink & sink)
2279+
{
2280+
while (true)
2281+
{
22442282
task_result result = llama.next_result(task_id);
2245-
if(!result.error) {
2283+
if (!result.error) {
22462284
const std::string str =
22472285
"data: " +
22482286
result.result_json.dump(-1, ' ', false, json::error_handler_t::replace) +
@@ -2264,10 +2302,13 @@ int main(int argc, char **argv)
22642302
sink.done();
22652303
return true;
22662304
};
2267-
auto on_complete = [task_id, &llama] (bool) {
2305+
2306+
auto on_complete = [task_id, &llama] (bool)
2307+
{
22682308
// cancel
22692309
llama.request_cancel(task_id);
22702310
};
2311+
22712312
res.set_chunked_content_provider("text/event-stream", chunked_content_provider, on_complete);
22722313
}
22732314
});
@@ -2279,7 +2320,8 @@ int main(int argc, char **argv)
22792320
if (!json_value(data, "stream", false)) {
22802321
std::string completion_text;
22812322
task_result result = llama.next_result(task_id);
2282-
if(!result.error && result.stop) {
2323+
if (!result.error && result.stop)
2324+
{
22832325
res.set_content(result.result_json.dump(-1, ' ', false, json::error_handler_t::replace), "application/json");
22842326
}
22852327
else
@@ -2290,9 +2332,10 @@ int main(int argc, char **argv)
22902332
}
22912333
} else {
22922334
const auto chunked_content_provider = [task_id, &llama](size_t, httplib::DataSink & sink) {
2293-
while(true) {
2335+
while (true)
2336+
{
22942337
task_result result = llama.next_result(task_id);
2295-
if(!result.error) {
2338+
if (!result.error) {
22962339
const std::string str =
22972340
"data: " +
22982341
result.result_json.dump(-1, ' ', false, json::error_handler_t::replace) +
@@ -2304,20 +2347,28 @@ int main(int argc, char **argv)
23042347
{
23052348
return false;
23062349
}
2307-
if(result.stop) {
2350+
if (result.stop)
2351+
{
23082352
break;
23092353
}
2310-
} else {
2354+
}
2355+
else
2356+
{
23112357
break;
23122358
}
23132359
}
2360+
23142361
sink.done();
2362+
23152363
return true;
23162364
};
2317-
auto on_complete = [task_id, &llama] (bool) {
2365+
2366+
auto on_complete = [task_id, &llama] (bool)
2367+
{
23182368
// cancel
23192369
llama.request_cancel(task_id);
23202370
};
2371+
23212372
res.set_chunked_content_provider("text/event-stream", chunked_content_provider, on_complete);
23222373
}
23232374
});

0 commit comments

Comments
 (0)