Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit 44d831f

Browse files
fix: UI/UX improvement for cortex CLI (#1351)
* fix: CLI * fix: remove chat-completion command --------- Co-authored-by: vansangpfiev <[email protected]>
1 parent a5a1877 commit 44d831f

File tree

8 files changed

+201
-154
lines changed

8 files changed

+201
-154
lines changed

engine/commands/chat_cmd.cc

Lines changed: 4 additions & 131 deletions
Original file line numberDiff line numberDiff line change
@@ -1,144 +1,17 @@
11
#include "chat_cmd.h"
22
#include "httplib.h"
33

4-
#include "cortex_upd_cmd.h"
54
#include "database/models.h"
65
#include "model_status_cmd.h"
76
#include "server_start_cmd.h"
87
#include "trantor/utils/Logger.h"
98
#include "utils/logging_utils.h"
9+
#include "run_cmd.h"
1010

1111
namespace commands {
12-
namespace {
13-
constexpr const char* kExitChat = "exit()";
14-
constexpr const auto kMinDataChunkSize = 6u;
15-
constexpr const char* kUser = "user";
16-
constexpr const char* kAssistant = "assistant";
17-
18-
} // namespace
19-
20-
struct ChunkParser {
21-
std::string content;
22-
bool is_done = false;
23-
24-
ChunkParser(const char* data, size_t data_length) {
25-
if (data && data_length > kMinDataChunkSize) {
26-
std::string s(data + kMinDataChunkSize, data_length - kMinDataChunkSize);
27-
if (s.find("[DONE]") != std::string::npos) {
28-
is_done = true;
29-
} else {
30-
try {
31-
content = nlohmann::json::parse(s)["choices"][0]["delta"]["content"];
32-
} catch (const nlohmann::json::parse_error& e) {
33-
CTL_WRN("JSON parse error: " << e.what());
34-
}
35-
}
36-
}
37-
}
38-
};
39-
4012
void ChatCmd::Exec(const std::string& host, int port,
41-
const std::string& model_handle, std::string msg) {
42-
cortex::db::Models modellist_handler;
43-
config::YamlHandler yaml_handler;
44-
try {
45-
auto model_entry = modellist_handler.GetModelInfo(model_handle);
46-
if (model_entry.has_error()) {
47-
CLI_LOG("Error: " + model_entry.error());
48-
return;
49-
}
50-
yaml_handler.ModelConfigFromFile(model_entry.value().path_to_model_yaml);
51-
auto mc = yaml_handler.GetModelConfig();
52-
Exec(host, port, mc, std::move(msg));
53-
} catch (const std::exception& e) {
54-
CLI_LOG("Fail to start model information with ID '" + model_handle +
55-
"': " + e.what());
56-
}
13+
const std::string& model_handle) {
14+
RunCmd rc(host, port, model_handle);
15+
rc.Exec(true /*chat_flag*/);
5716
}
58-
59-
void ChatCmd::Exec(const std::string& host, int port,
60-
const config::ModelConfig& mc, std::string msg) {
61-
auto address = host + ":" + std::to_string(port);
62-
// Check if server is started
63-
{
64-
if (!commands::IsServerAlive(host, port)) {
65-
CLI_LOG("Server is not started yet, please run `"
66-
<< commands::GetCortexBinary() << " start` to start server!");
67-
return;
68-
}
69-
}
70-
71-
// Only check if llamacpp engine
72-
if ((mc.engine.find("llamacpp") != std::string::npos) &&
73-
!commands::ModelStatusCmd().IsLoaded(host, port, mc)) {
74-
CLI_LOG("Model is not loaded yet!");
75-
return;
76-
}
77-
78-
// Interactive mode or not
79-
bool interactive = msg.empty();
80-
81-
// Some instruction for user here
82-
if (interactive) {
83-
std::cout << "Inorder to exit, type `exit()`" << std::endl;
84-
}
85-
// Model is loaded, start to chat
86-
{
87-
do {
88-
std::string user_input = std::move(msg);
89-
if (user_input.empty()) {
90-
std::cout << "> ";
91-
std::getline(std::cin, user_input);
92-
}
93-
if (user_input == kExitChat) {
94-
break;
95-
}
96-
97-
if (!user_input.empty()) {
98-
httplib::Client cli(address);
99-
nlohmann::json json_data;
100-
nlohmann::json new_data;
101-
new_data["role"] = kUser;
102-
new_data["content"] = user_input;
103-
histories_.push_back(std::move(new_data));
104-
json_data["engine"] = mc.engine;
105-
json_data["messages"] = histories_;
106-
json_data["model"] = mc.name;
107-
//TODO: support non-stream
108-
json_data["stream"] = true;
109-
json_data["stop"] = mc.stop;
110-
auto data_str = json_data.dump();
111-
// std::cout << data_str << std::endl;
112-
cli.set_read_timeout(std::chrono::seconds(60));
113-
// std::cout << "> ";
114-
httplib::Request req;
115-
req.headers = httplib::Headers();
116-
req.set_header("Content-Type", "application/json");
117-
req.method = "POST";
118-
req.path = "/v1/chat/completions";
119-
req.body = data_str;
120-
std::string ai_chat;
121-
req.content_receiver = [&](const char* data, size_t data_length,
122-
uint64_t offset, uint64_t total_length) {
123-
ChunkParser cp(data, data_length);
124-
if (cp.is_done) {
125-
std::cout << std::endl;
126-
return false;
127-
}
128-
std::cout << cp.content << std::flush;
129-
ai_chat += cp.content;
130-
return true;
131-
};
132-
cli.send(req);
133-
134-
nlohmann::json ai_res;
135-
ai_res["role"] = kAssistant;
136-
ai_res["content"] = ai_chat;
137-
histories_.push_back(std::move(ai_res));
138-
}
139-
// std::cout << "ok Done" << std::endl;
140-
} while (interactive);
141-
}
142-
}
143-
14417
}; // namespace commands

engine/commands/chat_cmd.h

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,9 @@
11
#pragma once
22
#include <string>
3-
#include <vector>
4-
#include "config/model_config.h"
5-
#include "nlohmann/json.hpp"
63

74
namespace commands {
85
class ChatCmd {
96
public:
10-
void Exec(const std::string& host, int port, const std::string& model_handle,
11-
std::string msg);
12-
void Exec(const std::string& host, int port, const config::ModelConfig& mc,
13-
std::string msg);
14-
15-
private:
16-
std::vector<nlohmann::json> histories_;
7+
void Exec(const std::string& host, int port, const std::string& model_handle);
178
};
189
} // namespace commands
Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
#include "chat_completion_cmd.h"
2+
#include "httplib.h"
3+
4+
#include "cortex_upd_cmd.h"
5+
#include "database/models.h"
6+
#include "model_status_cmd.h"
7+
#include "server_start_cmd.h"
8+
#include "trantor/utils/Logger.h"
9+
#include "utils/logging_utils.h"
10+
#include "run_cmd.h"
11+
12+
namespace commands {
13+
namespace {
14+
constexpr const char* kExitChat = "exit()";
15+
constexpr const auto kMinDataChunkSize = 6u;
16+
constexpr const char* kUser = "user";
17+
constexpr const char* kAssistant = "assistant";
18+
19+
} // namespace
20+
21+
struct ChunkParser {
22+
std::string content;
23+
bool is_done = false;
24+
25+
ChunkParser(const char* data, size_t data_length) {
26+
if (data && data_length > kMinDataChunkSize) {
27+
std::string s(data + kMinDataChunkSize, data_length - kMinDataChunkSize);
28+
if (s.find("[DONE]") != std::string::npos) {
29+
is_done = true;
30+
} else {
31+
try {
32+
content = nlohmann::json::parse(s)["choices"][0]["delta"]["content"];
33+
} catch (const nlohmann::json::parse_error& e) {
34+
CTL_WRN("JSON parse error: " << e.what());
35+
}
36+
}
37+
}
38+
}
39+
};
40+
41+
void ChatCompletionCmd::Exec(const std::string& host, int port,
42+
const std::string& model_handle, std::string msg) {
43+
cortex::db::Models modellist_handler;
44+
config::YamlHandler yaml_handler;
45+
try {
46+
auto model_entry = modellist_handler.GetModelInfo(model_handle);
47+
if (model_entry.has_error()) {
48+
CLI_LOG("Error: " + model_entry.error());
49+
return;
50+
}
51+
yaml_handler.ModelConfigFromFile(model_entry.value().path_to_model_yaml);
52+
auto mc = yaml_handler.GetModelConfig();
53+
Exec(host, port, mc, std::move(msg));
54+
} catch (const std::exception& e) {
55+
CLI_LOG("Fail to start model information with ID '" + model_handle +
56+
"': " + e.what());
57+
}
58+
}
59+
60+
void ChatCompletionCmd::Exec(const std::string& host, int port,
61+
const config::ModelConfig& mc, std::string msg) {
62+
auto address = host + ":" + std::to_string(port);
63+
// Check if server is started
64+
{
65+
if (!commands::IsServerAlive(host, port)) {
66+
CLI_LOG("Server is not started yet, please run `"
67+
<< commands::GetCortexBinary() << " start` to start server!");
68+
return;
69+
}
70+
}
71+
72+
// Only check if llamacpp engine
73+
if ((mc.engine.find("llamacpp") != std::string::npos) &&
74+
!commands::ModelStatusCmd().IsLoaded(host, port, mc)) {
75+
CLI_LOG("Model is not loaded yet!");
76+
return;
77+
}
78+
79+
// Interactive mode or not
80+
bool interactive = msg.empty();
81+
82+
// Some instruction for user here
83+
if (interactive) {
84+
std::cout << "Inorder to exit, type `exit()`" << std::endl;
85+
}
86+
// Model is loaded, start to chat
87+
{
88+
do {
89+
std::string user_input = std::move(msg);
90+
if (user_input.empty()) {
91+
std::cout << "> ";
92+
std::getline(std::cin, user_input);
93+
}
94+
if (user_input == kExitChat) {
95+
break;
96+
}
97+
98+
if (!user_input.empty()) {
99+
httplib::Client cli(address);
100+
nlohmann::json json_data;
101+
nlohmann::json new_data;
102+
new_data["role"] = kUser;
103+
new_data["content"] = user_input;
104+
histories_.push_back(std::move(new_data));
105+
json_data["engine"] = mc.engine;
106+
json_data["messages"] = histories_;
107+
json_data["model"] = mc.name;
108+
//TODO: support non-stream
109+
json_data["stream"] = true;
110+
json_data["stop"] = mc.stop;
111+
auto data_str = json_data.dump();
112+
// std::cout << data_str << std::endl;
113+
cli.set_read_timeout(std::chrono::seconds(60));
114+
// std::cout << "> ";
115+
httplib::Request req;
116+
req.headers = httplib::Headers();
117+
req.set_header("Content-Type", "application/json");
118+
req.method = "POST";
119+
req.path = "/v1/chat/completions";
120+
req.body = data_str;
121+
std::string ai_chat;
122+
req.content_receiver = [&](const char* data, size_t data_length,
123+
uint64_t offset, uint64_t total_length) {
124+
ChunkParser cp(data, data_length);
125+
if (cp.is_done) {
126+
std::cout << std::endl;
127+
return false;
128+
}
129+
std::cout << cp.content << std::flush;
130+
ai_chat += cp.content;
131+
return true;
132+
};
133+
cli.send(req);
134+
135+
nlohmann::json ai_res;
136+
ai_res["role"] = kAssistant;
137+
ai_res["content"] = ai_chat;
138+
histories_.push_back(std::move(ai_res));
139+
}
140+
// std::cout << "ok Done" << std::endl;
141+
} while (interactive);
142+
}
143+
}
144+
145+
}; // namespace commands
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
#pragma once
2+
#include <string>
3+
#include <vector>
4+
#include "config/model_config.h"
5+
#include "nlohmann/json.hpp"
6+
7+
namespace commands {
8+
class ChatCompletionCmd {
9+
public:
10+
void Exec(const std::string& host, int port, const std::string& model_handle,
11+
std::string msg);
12+
void Exec(const std::string& host, int port, const config::ModelConfig& mc,
13+
std::string msg);
14+
15+
private:
16+
std::vector<nlohmann::json> histories_;
17+
};
18+
} // namespace commands

engine/commands/run_cmd.cc

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,17 @@
11
#include "run_cmd.h"
2-
#include "chat_cmd.h"
2+
#include "chat_completion_cmd.h"
33
#include "config/yaml_config.h"
44
#include "database/models.h"
55
#include "model_start_cmd.h"
66
#include "model_status_cmd.h"
77
#include "server_start_cmd.h"
88
#include "utils/logging_utils.h"
99

10+
#include "cortex_upd_cmd.h"
11+
1012
namespace commands {
1113

12-
void RunCmd::Exec() {
14+
void RunCmd::Exec(bool chat_flag) {
1315
std::optional<std::string> model_id = model_handle_;
1416

1517
cortex::db::Models modellist_handler;
@@ -78,7 +80,13 @@ void RunCmd::Exec() {
7880
}
7981

8082
// Chat
81-
ChatCmd().Exec(host_, port_, mc, "");
83+
if (chat_flag) {
84+
ChatCompletionCmd().Exec(host_, port_, mc, "");
85+
} else {
86+
CLI_LOG(*model_id << " model started successfully. Use `"
87+
<< commands::GetCortexBinary() << " chat " << *model_id
88+
<< "` for interactive chat shell");
89+
}
8290
} catch (const std::exception& e) {
8391
CLI_LOG("Fail to run model with ID '" + model_handle_ + "': " + e.what());
8492
}

engine/commands/run_cmd.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#pragma once
22
#include <string>
3+
#include "nlohmann/json.hpp"
34
#include "services/engine_service.h"
45
#include "services/model_service.h"
56

@@ -12,7 +13,7 @@ class RunCmd {
1213
model_handle_{std::move(model_handle)},
1314
model_service_{ModelService()} {};
1415

15-
void Exec();
16+
void Exec(bool chat_flag);
1617

1718
private:
1819
std::string host_;

0 commit comments

Comments
 (0)