Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit 58ec4f9

Browse files
authored
fix: correct model_id in chat (#1379)
* fix: correct model_id for chat * fix: fallback to cortex.llamacpp if does not have engine field
1 parent d30d6b0 commit 58ec4f9

File tree

14 files changed

+281
-175
lines changed

14 files changed

+281
-175
lines changed

engine/commands/chat_completion_cmd.cc

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,11 @@
44
#include "cortex_upd_cmd.h"
55
#include "database/models.h"
66
#include "model_status_cmd.h"
7+
#include "run_cmd.h"
78
#include "server_start_cmd.h"
89
#include "trantor/utils/Logger.h"
910
#include "utils/logging_utils.h"
10-
#include "run_cmd.h"
11+
#include "config/yaml_config.h"
1112

1213
namespace commands {
1314
namespace {
@@ -39,7 +40,7 @@ struct ChunkParser {
3940
};
4041

4142
void ChatCompletionCmd::Exec(const std::string& host, int port,
42-
const std::string& model_handle, std::string msg) {
43+
const std::string& model_handle, std::string msg) {
4344
cortex::db::Models modellist_handler;
4445
config::YamlHandler yaml_handler;
4546
try {
@@ -50,15 +51,16 @@ void ChatCompletionCmd::Exec(const std::string& host, int port,
5051
}
5152
yaml_handler.ModelConfigFromFile(model_entry.value().path_to_model_yaml);
5253
auto mc = yaml_handler.GetModelConfig();
53-
Exec(host, port, mc, std::move(msg));
54+
Exec(host, port, model_handle, mc, std::move(msg));
5455
} catch (const std::exception& e) {
5556
CLI_LOG("Fail to start model information with ID '" + model_handle +
5657
"': " + e.what());
5758
}
5859
}
5960

6061
void ChatCompletionCmd::Exec(const std::string& host, int port,
61-
const config::ModelConfig& mc, std::string msg) {
62+
const std::string& model_handle,
63+
const config::ModelConfig& mc, std::string msg) {
6264
auto address = host + ":" + std::to_string(port);
6365
// Check if server is started
6466
{
@@ -71,7 +73,7 @@ void ChatCompletionCmd::Exec(const std::string& host, int port,
7173

7274
// Only check if llamacpp engine
7375
if ((mc.engine.find("llamacpp") != std::string::npos) &&
74-
!commands::ModelStatusCmd().IsLoaded(host, port, mc)) {
76+
!commands::ModelStatusCmd().IsLoaded(host, port, model_handle)) {
7577
CLI_LOG("Model is not loaded yet!");
7678
return;
7779
}
@@ -104,7 +106,7 @@ void ChatCompletionCmd::Exec(const std::string& host, int port,
104106
histories_.push_back(std::move(new_data));
105107
json_data["engine"] = mc.engine;
106108
json_data["messages"] = histories_;
107-
json_data["model"] = mc.name;
109+
json_data["model"] = model_handle;
108110
//TODO: support non-stream
109111
json_data["stream"] = true;
110112
json_data["stop"] = mc.stop;

engine/commands/chat_completion_cmd.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@ class ChatCompletionCmd {
99
public:
1010
void Exec(const std::string& host, int port, const std::string& model_handle,
1111
std::string msg);
12-
void Exec(const std::string& host, int port, const config::ModelConfig& mc,
13-
std::string msg);
12+
void Exec(const std::string& host, int port, const std::string& model_handle,
13+
const config::ModelConfig& mc, std::string msg);
1414

1515
private:
1616
std::vector<nlohmann::json> histories_;

engine/commands/model_start_cmd.cc

Lines changed: 7 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -5,84 +5,23 @@
55
#include "model_status_cmd.h"
66
#include "nlohmann/json.hpp"
77
#include "server_start_cmd.h"
8+
#include "services/model_service.h"
89
#include "trantor/utils/Logger.h"
910
#include "utils/file_manager_utils.h"
1011
#include "utils/logging_utils.h"
1112

1213
namespace commands {
1314
bool ModelStartCmd::Exec(const std::string& host, int port,
1415
const std::string& model_handle) {
16+
ModelService ms;
17+
auto res = ms.StartModel(host, port, model_handle);
1518

16-
cortex::db::Models modellist_handler;
17-
config::YamlHandler yaml_handler;
18-
try {
19-
auto model_entry = modellist_handler.GetModelInfo(model_handle);
20-
if (model_entry.has_error()) {
21-
CLI_LOG("Error: " + model_entry.error());
22-
return false;
23-
}
24-
yaml_handler.ModelConfigFromFile(model_entry.value().path_to_model_yaml);
25-
auto mc = yaml_handler.GetModelConfig();
26-
return Exec(host, port, mc);
27-
} catch (const std::exception& e) {
28-
CLI_LOG("Fail to start model information with ID '" + model_handle +
29-
"': " + e.what());
19+
if (res.has_error()) {
20+
CLI_LOG("Error: " + res.error());
3021
return false;
3122
}
32-
}
33-
34-
bool ModelStartCmd::Exec(const std::string& host, int port,
35-
const config::ModelConfig& mc) {
36-
// Check if server is started
37-
if (!commands::IsServerAlive(host, port)) {
38-
CLI_LOG("Server is not started yet, please run `"
39-
<< commands::GetCortexBinary() << " start` to start server!");
40-
return false;
41-
}
42-
43-
// Only check for llamacpp for now
44-
if ((mc.engine.find("llamacpp") != std::string::npos) &&
45-
commands::ModelStatusCmd().IsLoaded(host, port, mc)) {
46-
CLI_LOG("Model has already been started!");
47-
return true;
48-
}
49-
50-
httplib::Client cli(host + ":" + std::to_string(port));
51-
52-
nlohmann::json json_data;
53-
if (mc.files.size() > 0) {
54-
// TODO(sang) support multiple files
55-
json_data["model_path"] = mc.files[0];
56-
} else {
57-
LOG_WARN << "model_path is empty";
58-
return false;
59-
}
60-
json_data["model"] = mc.name;
61-
json_data["system_prompt"] = mc.system_template;
62-
json_data["user_prompt"] = mc.user_template;
63-
json_data["ai_prompt"] = mc.ai_template;
64-
json_data["ctx_len"] = mc.ctx_len;
65-
json_data["stop"] = mc.stop;
66-
json_data["engine"] = mc.engine;
67-
68-
auto data_str = json_data.dump();
69-
cli.set_read_timeout(std::chrono::seconds(60));
70-
auto res = cli.Post("/inferences/server/loadmodel", httplib::Headers(),
71-
data_str.data(), data_str.size(), "application/json");
72-
if (res) {
73-
if (res->status == httplib::StatusCode::OK_200) {
74-
CLI_LOG("Model loaded!");
75-
return true;
76-
} else {
77-
CTL_ERR("Model failed to load with status code: " << res->status);
78-
return false;
79-
}
80-
} else {
81-
auto err = res.error();
82-
CTL_ERR("HTTP error: " << httplib::to_string(err));
83-
return false;
84-
}
85-
return false;
23+
CLI_LOG("Model loaded!");
24+
return true;
8625
}
8726

8827
}; // namespace commands

engine/commands/model_start_cmd.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,11 @@
11
#pragma once
22
#include <string>
3-
#include "config/model_config.h"
43

54
namespace commands {
65

76
class ModelStartCmd {
87
public:
98
bool Exec(const std::string& host, int port, const std::string& model_handle);
109

11-
bool Exec(const std::string& host, int port, const config::ModelConfig& mc);
1210
};
1311
} // namespace commands

engine/commands/model_status_cmd.cc

Lines changed: 6 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -4,49 +4,18 @@
44
#include "httplib.h"
55
#include "nlohmann/json.hpp"
66
#include "utils/logging_utils.h"
7+
#include "services/model_service.h"
78

89
namespace commands {
910
bool ModelStatusCmd::IsLoaded(const std::string& host, int port,
1011
const std::string& model_handle) {
11-
cortex::db::Models modellist_handler;
12-
config::YamlHandler yaml_handler;
13-
try {
14-
auto model_entry = modellist_handler.GetModelInfo(model_handle);
15-
if (model_entry.has_error()) {
16-
CLI_LOG("Error: " + model_entry.error());
17-
return false;
18-
}
19-
yaml_handler.ModelConfigFromFile(model_entry.value().path_to_model_yaml);
20-
auto mc = yaml_handler.GetModelConfig();
21-
return IsLoaded(host, port, mc);
22-
} catch (const std::exception& e) {
23-
CLI_LOG("Fail to get model status with ID '" + model_handle +
24-
"': " + e.what());
25-
return false;
26-
}
27-
}
12+
ModelService ms;
13+
auto res = ms.GetModelStatus(host, port, model_handle);
2814

29-
bool ModelStatusCmd::IsLoaded(const std::string& host, int port,
30-
const config::ModelConfig& mc) {
31-
httplib::Client cli(host + ":" + std::to_string(port));
32-
nlohmann::json json_data;
33-
json_data["model"] = mc.name;
34-
json_data["engine"] = mc.engine;
35-
36-
auto data_str = json_data.dump();
37-
38-
auto res = cli.Post("/inferences/server/modelstatus", httplib::Headers(),
39-
data_str.data(), data_str.size(), "application/json");
40-
if (res) {
41-
if (res->status == httplib::StatusCode::OK_200) {
42-
return true;
43-
}
44-
} else {
45-
auto err = res.error();
46-
CTL_WRN("HTTP error: " << httplib::to_string(err));
15+
if (res.has_error()) {
16+
// CLI_LOG("Error: " + res.error());
4717
return false;
4818
}
49-
50-
return false;
19+
return true;
5120
}
5221
} // namespace commands

engine/commands/model_status_cmd.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,11 @@
11
#pragma once
22
#include <string>
3-
#include "config/yaml_config.h"
43

54
namespace commands {
65

76
class ModelStatusCmd {
87
public:
98
bool IsLoaded(const std::string& host, int port,
109
const std::string& model_handle);
11-
bool IsLoaded(const std::string& host, int port,
12-
const config::ModelConfig& mc);
1310
};
1411
} // namespace commands

engine/commands/model_stop_cmd.cc

Lines changed: 7 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -5,45 +5,20 @@
55
#include "nlohmann/json.hpp"
66
#include "utils/file_manager_utils.h"
77
#include "utils/logging_utils.h"
8+
#include "services/model_service.h"
89

910
namespace commands {
1011

1112
void ModelStopCmd::Exec(const std::string& host, int port,
1213
const std::string& model_handle) {
13-
cortex::db::Models modellist_handler;
14-
config::YamlHandler yaml_handler;
15-
try {
16-
auto model_entry = modellist_handler.GetModelInfo(model_handle);
17-
if (model_entry.has_error()) {
18-
CLI_LOG("Error: " + model_entry.error());
19-
return;
20-
}
21-
yaml_handler.ModelConfigFromFile(model_entry.value().path_to_model_yaml);
22-
auto mc = yaml_handler.GetModelConfig();
23-
httplib::Client cli(host + ":" + std::to_string(port));
24-
nlohmann::json json_data;
25-
json_data["model"] = mc.name;
26-
json_data["engine"] = mc.engine;
14+
ModelService ms;
15+
auto res = ms.StopModel(host, port, model_handle);
2716

28-
auto data_str = json_data.dump();
29-
30-
auto res = cli.Post("/inferences/server/unloadmodel", httplib::Headers(),
31-
data_str.data(), data_str.size(), "application/json");
32-
if (res) {
33-
if (res->status == httplib::StatusCode::OK_200) {
34-
// LOG_INFO << res->body;
35-
CLI_LOG("Model unloaded!");
36-
} else {
37-
CLI_LOG("Error: could not unload model - " << res->status);
38-
}
39-
} else {
40-
auto err = res.error();
41-
CTL_ERR("HTTP error: " << httplib::to_string(err));
42-
}
43-
} catch (const std::exception& e) {
44-
CLI_LOG("Fail to stop model information with ID '" + model_handle +
45-
"': " + e.what());
17+
if (res.has_error()) {
18+
CLI_LOG("Error: " + res.error());
19+
return;
4620
}
21+
CLI_LOG("Model unloaded!");
4722
}
4823

4924
}; // namespace commands

engine/commands/run_cmd.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -72,16 +72,16 @@ void RunCmd::Exec(bool chat_flag) {
7272
// If it is llamacpp, then check model status first
7373
{
7474
if ((mc.engine.find("llamacpp") == std::string::npos) ||
75-
!commands::ModelStatusCmd().IsLoaded(host_, port_, mc)) {
76-
if (!ModelStartCmd().Exec(host_, port_, mc)) {
75+
!commands::ModelStatusCmd().IsLoaded(host_, port_, model_handle_)) {
76+
if (!ModelStartCmd().Exec(host_, port_, model_handle_)) {
7777
return;
7878
}
7979
}
8080
}
8181

8282
// Chat
8383
if (chat_flag) {
84-
ChatCompletionCmd().Exec(host_, port_, mc, "");
84+
ChatCompletionCmd().Exec(host_, port_, model_handle_, mc, "");
8585
} else {
8686
CLI_LOG(*model_id << " model started successfully. Use `"
8787
<< commands::GetCortexBinary() << " chat " << *model_id

engine/controllers/models.cc

Lines changed: 49 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -226,9 +226,8 @@ void Models::ImportModel(
226226
std::filesystem::path("imported") /
227227
std::filesystem::path(modelHandle + ".yml"))
228228
.string();
229-
cortex::db::ModelEntry model_entry{
230-
modelHandle, "local", "imported",
231-
model_yaml_path, modelHandle};
229+
cortex::db::ModelEntry model_entry{modelHandle, "local", "imported",
230+
model_yaml_path, modelHandle};
232231
try {
233232
std::filesystem::create_directories(
234233
std::filesystem::path(model_yaml_path).parent_path());
@@ -331,3 +330,50 @@ void Models::SetModelAlias(
331330
callback(resp);
332331
}
333332
}
333+
334+
void Models::StartModel(
335+
const HttpRequestPtr& req,
336+
std::function<void(const HttpResponsePtr&)>&& callback) {
337+
if (!http_util::HasFieldInReq(req, callback, "model"))
338+
return;
339+
auto config = file_manager_utils::GetCortexConfig();
340+
auto model_handle = (*(req->getJsonObject())).get("model", "").asString();
341+
auto result = model_service_.StartModel(
342+
config.apiServerHost, std::stoi(config.apiServerPort), model_handle);
343+
if (result.has_error()) {
344+
Json::Value ret;
345+
ret["message"] = result.error();
346+
auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret);
347+
resp->setStatusCode(drogon::k400BadRequest);
348+
callback(resp);
349+
} else {
350+
Json::Value ret;
351+
ret["message"] = "Started successfully!";
352+
auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret);
353+
resp->setStatusCode(k200OK);
354+
callback(resp);
355+
}
356+
}
357+
358+
void Models::StopModel(const HttpRequestPtr& req,
359+
std::function<void(const HttpResponsePtr&)>&& callback) {
360+
if (!http_util::HasFieldInReq(req, callback, "model"))
361+
return;
362+
auto config = file_manager_utils::GetCortexConfig();
363+
auto model_handle = (*(req->getJsonObject())).get("model", "").asString();
364+
auto result = model_service_.StopModel(
365+
config.apiServerHost, std::stoi(config.apiServerPort), model_handle);
366+
if (result.has_error()) {
367+
Json::Value ret;
368+
ret["message"] = result.error();
369+
auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret);
370+
resp->setStatusCode(drogon::k400BadRequest);
371+
callback(resp);
372+
} else {
373+
Json::Value ret;
374+
ret["message"] = "Started successfully!";
375+
auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret);
376+
resp->setStatusCode(k200OK);
377+
callback(resp);
378+
}
379+
}

0 commit comments

Comments
 (0)