Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit 3f7d3ec

Browse files
authored
fix: should check model status before start it (#1277)
* fix: check model status before start * fix: read timeout for checking update * fix: only set logger for engine once
1 parent d9c5b41 commit 3f7d3ec

File tree

7 files changed

+81
-38
lines changed

7 files changed

+81
-38
lines changed

engine/commands/chat_cmd.cc

Lines changed: 7 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,10 @@
22
#include "httplib.h"
33

44
#include "cortex_upd_cmd.h"
5+
#include "model_status_cmd.h"
6+
#include "server_start_cmd.h"
57
#include "trantor/utils/Logger.h"
68
#include "utils/logging_utils.h"
7-
#include "server_start_cmd.h"
89

910
namespace commands {
1011
namespace {
@@ -45,29 +46,11 @@ void ChatCmd::Exec(std::string msg) {
4546
}
4647

4748
auto address = host_ + ":" + std::to_string(port_);
48-
// Check if model is loaded
49-
// TODO(sang) only llamacpp support modelstatus for now
50-
if (mc_.engine.find("llamacpp") != std::string::npos) {
51-
httplib::Client cli(address);
52-
nlohmann::json json_data;
53-
json_data["model"] = mc_.name;
54-
json_data["engine"] = mc_.engine;
55-
56-
auto data_str = json_data.dump();
57-
58-
// TODO: move this to another message?
59-
auto res = cli.Post("/inferences/server/modelstatus", httplib::Headers(),
60-
data_str.data(), data_str.size(), "application/json");
61-
if (res) {
62-
if (res->status != httplib::StatusCode::OK_200) {
63-
CTL_ERR(res->body);
64-
return;
65-
}
66-
} else {
67-
auto err = res.error();
68-
CTL_ERR("HTTP error: " << httplib::to_string(err));
69-
return;
70-
}
49+
// Only check if llamacpp engine
50+
if ((mc_.engine.find("llamacpp") != std::string::npos) &&
51+
!commands::ModelStatusCmd().IsLoaded(host_, port_, mc_)) {
52+
CLI_LOG("Model is not loaded yet!");
53+
return;
7154
}
7255

7356
// Some instruction for user here

engine/commands/cortex_upd_cmd.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ inline void CheckNewUpdate() {
7070

7171
httplib::Client cli(host_name);
7272
cli.set_connection_timeout(kTimeoutCheckUpdate);
73+
cli.set_read_timeout(kTimeoutCheckUpdate);
7374
if (auto res = cli.Get(release_path)) {
7475
if (res->status == httplib::StatusCode::OK_200) {
7576
try {

engine/commands/model_start_cmd.cc

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#include "model_start_cmd.h"
22
#include "cortex_upd_cmd.h"
33
#include "httplib.h"
4+
#include "model_status_cmd.h"
45
#include "nlohmann/json.hpp"
56
#include "server_start_cmd.h"
67
#include "trantor/utils/Logger.h"
@@ -19,6 +20,12 @@ bool ModelStartCmd::Exec() {
1920
<< commands::GetCortexBinary() << " start` to start server!");
2021
return false;
2122
}
23+
// Only check for llamacpp for now
24+
if ((mc_.engine.find("llamacpp") != std::string::npos) &&
25+
commands::ModelStatusCmd().IsLoaded(host_, port_, mc_)) {
26+
CLI_LOG("Model has already been started!");
27+
return true;
28+
}
2229

2330
httplib::Client cli(host_ + ":" + std::to_string(port_));
2431

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
#include "model_status_cmd.h"
2+
#include "config/yaml_config.h"
3+
#include "httplib.h"
4+
#include "nlohmann/json.hpp"
5+
#include "utils/logging_utils.h"
6+
7+
namespace commands {
8+
bool ModelStatusCmd::IsLoaded(const std::string& host, int port,
9+
const config::ModelConfig& mc) {
10+
httplib::Client cli(host + ":" + std::to_string(port));
11+
nlohmann::json json_data;
12+
json_data["model"] = mc.name;
13+
json_data["engine"] = mc.engine;
14+
15+
auto data_str = json_data.dump();
16+
17+
auto res = cli.Post("/inferences/server/modelstatus", httplib::Headers(),
18+
data_str.data(), data_str.size(), "application/json");
19+
if (res) {
20+
if (res->status == httplib::StatusCode::OK_200) {
21+
return true;
22+
}
23+
} else {
24+
auto err = res.error();
25+
CTL_WRN("HTTP error: " << httplib::to_string(err));
26+
return false;
27+
}
28+
29+
return false;
30+
}
31+
} // namespace commands

engine/commands/model_status_cmd.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
#pragma once
2+
#include <string>
3+
#include "config/yaml_config.h"
4+
5+
namespace commands {
6+
7+
class ModelStatusCmd {
8+
public:
9+
bool IsLoaded(const std::string& host, int port,
10+
const config::ModelConfig& mc);
11+
};
12+
} // namespace commands

engine/commands/run_cmd.cc

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#include "cmd_info.h"
44
#include "config/yaml_config.h"
55
#include "model_start_cmd.h"
6+
#include "model_status_cmd.h"
67
#include "server_start_cmd.h"
78
#include "utils/file_manager_utils.h"
89

@@ -47,21 +48,27 @@ void RunCmd::Exec() {
4748
}
4849
}
4950

50-
// Start model
5151
config::YamlHandler yaml_handler;
5252
yaml_handler.ModelConfigFromFile(
5353
file_manager_utils::GetModelsContainerPath().string() + "/" + model_file +
5454
".yaml");
55+
auto mc = yaml_handler.GetModelConfig();
56+
57+
// Always start model if not llamacpp
58+
// If it is llamacpp, then check model status first
5559
{
56-
ModelStartCmd msc(host_, port_, yaml_handler.GetModelConfig());
57-
if (!msc.Exec()) {
58-
return;
60+
if ((mc.engine.find("llamacpp") == std::string::npos) ||
61+
!commands::ModelStatusCmd().IsLoaded(host_, port_, mc)) {
62+
ModelStartCmd msc(host_, port_, mc);
63+
if (!msc.Exec()) {
64+
return;
65+
}
5966
}
6067
}
6168

6269
// Chat
6370
{
64-
ChatCmd cc(host_, port_, yaml_handler.GetModelConfig());
71+
ChatCmd cc(host_, port_, mc);
6572
cc.Exec("");
6673
}
6774
}

engine/controllers/server.cc

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -342,20 +342,22 @@ void server::LoadModel(const HttpRequestPtr& req,
342342
auto func =
343343
engines_[engine_type].dl->get_function<EngineI*()>("get_engine");
344344
engines_[engine_type].engine = func();
345+
346+
auto& en = std::get<EngineI*>(engines_[engine_type].engine);
347+
if (engine_type == kLlamaEngine) { //fix for llamacpp engine first
348+
auto config = file_manager_utils::GetCortexConfig();
349+
if (en->IsSupported("SetFileLogger")) {
350+
en->SetFileLogger(config.maxLogLines, config.logFolderPath + "/" +
351+
cortex_utils::logs_base_name);
352+
} else {
353+
LOG_WARN << "Method SetFileLogger is not supported yet";
354+
}
355+
}
345356
LOG_INFO << "Loaded engine: " << engine_type;
346357
}
347358

348359
LOG_TRACE << "Load model";
349360
auto& en = std::get<EngineI*>(engines_[engine_type].engine);
350-
if (engine_type == kLlamaEngine) { //fix for llamacpp engine first
351-
auto config = file_manager_utils::GetCortexConfig();
352-
if (en->IsSupported("SetFileLogger")) {
353-
en->SetFileLogger(config.maxLogLines, config.logFolderPath + "/" +
354-
cortex_utils::logs_base_name);
355-
} else {
356-
LOG_WARN << "Method SetFileLogger is not supported yet";
357-
}
358-
}
359361
en->LoadModel(req->getJsonObject(), [cb = std::move(callback)](
360362
Json::Value status, Json::Value res) {
361363
auto resp = cortex_utils::CreateCortexHttpJsonResponse(res);

0 commit comments

Comments
 (0)