Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit 3f1ea0b

Browse files
committed
update
1 parent e209137 commit 3f1ea0b

File tree

9 files changed

+218
-161
lines changed

9 files changed

+218
-161
lines changed

engine/commands/engine_get_cmd.cc

Lines changed: 16 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -1,64 +1,28 @@
11
#include "engine_get_cmd.h"
22
#include <iostream>
33
#include <tabulate/table.hpp>
4-
#include "utils/file_manager_utils.h"
4+
#include "services/engine_service.h"
55
#include "utils/logging_utils.h"
66

77
namespace commands {
88

99
void EngineGetCmd::Exec() const {
10-
CTL_INF("[EneingeGetCmd] engine: " << engine_);
10+
CTL_INF("[EngineGetCmd] engine: " << engine_);
1111

12-
auto ecp = file_manager_utils::GetEnginesContainerPath();
13-
std::string onnx_status{"not_supported"};
14-
std::string llamacpp_status = std::filesystem::exists(ecp / "cortex.llamacpp")
15-
? "ready"
16-
: "not_initialized";
17-
std::string tensorrt_status{"not_supported"};
18-
19-
#ifdef _WIN32
20-
onnx_status = std::filesystem::exists(ecp / "cortex.onnx")
21-
? "ready"
22-
: "not_initialized";
23-
tensorrt_status = std::filesystem::exists(ecp / "cortex.tensorrt-llm")
24-
? "ready"
25-
: "not_initialized";
26-
#elif defined(__linux__)
27-
tensorrt_status = std::filesystem::exists(ecp / "cortex.tensorrt-llm")
28-
? "ready"
29-
: "not_initialized";
30-
#endif
31-
std::vector<EngineInfo> engines = {
32-
{.name = "cortex.onnx",
33-
.description = "This extension enables chat completion API calls using "
34-
"the Onnx engine",
35-
.version = "0.0.1",
36-
.product_name = "Onnx Inference Engine",
37-
.status = onnx_status},
38-
{.name = "cortex.llamacpp",
39-
.description = "This extension enables chat completion API calls using "
40-
"the LlamaCPP engine",
41-
.version = "0.0.1",
42-
.product_name = "LlamaCPP Inference Engine",
43-
.status = llamacpp_status},
44-
{.name = "cortex.tensorrt-llm",
45-
.description = "This extension enables chat completion API calls using "
46-
"the TensorrtLLM engine",
47-
.version = "0.0.1",
48-
.product_name = "TensorrtLLM Inference Engine",
49-
.status = tensorrt_status},
50-
};
51-
52-
tabulate::Table table;
53-
table.add_row({"name", "description", "version", "product name", "status"});
54-
table.format().font_color(tabulate::Color::green);
55-
for (auto& engine : engines) {
56-
if (engine.name == engine_) {
57-
table.add_row({engine.name, engine.description, engine.version,
58-
engine.product_name, engine.status});
59-
}
12+
auto engine_service = EngineService();
13+
try {
14+
auto status = engine_service.GetEngineInfo(engine_);
15+
tabulate::Table table;
16+
table.add_row({"name", "description", "version", "product name", "status"});
17+
table.format().font_color(tabulate::Color::green);
18+
table.add_row({status.name, status.description, status.version,
19+
status.product_name, status.status});
20+
std::cout << table << std::endl;
21+
} catch (const std::runtime_error& e) {
22+
std::cerr << "Engine " << engine_ << " is not supported!" << "\n";
23+
} catch (const std::exception& e) {
24+
std::cerr << "Failed to get engine info for " << engine_ << ": " << e.what()
25+
<< "\n";
6026
}
61-
62-
std::cout << table << std::endl;
6327
}
6428
}; // namespace commands

engine/commands/engine_get_cmd.h

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,9 @@
11
#pragma once
2+
23
#include <string>
34

45
namespace commands {
56
class EngineGetCmd {
6-
struct EngineInfo {
7-
std::string name;
8-
std::string description;
9-
std::string version;
10-
std::string product_name;
11-
std::string status;
12-
};
13-
147
public:
158
EngineGetCmd(const std::string& engine) : engine_{engine} {};
169

@@ -19,5 +12,4 @@ class EngineGetCmd {
1912
private:
2013
std::string engine_;
2114
};
22-
2315
} // namespace commands

engine/commands/engine_list_cmd.cc

Lines changed: 10 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,49 +1,23 @@
11
#include "engine_list_cmd.h"
2-
#include <filesystem>
32
#include <tabulate/table.hpp>
4-
#include "utils/file_manager_utils.h"
3+
#include "services/engine_service.h"
54

65
namespace commands {
76

87
bool EngineListCmd::Exec() {
9-
auto ecp = file_manager_utils::GetEnginesContainerPath();
10-
std::string onnx_status{"not_supported"};
11-
std::string llamacpp_status = std::filesystem::exists(ecp / "cortex.llamacpp")
12-
? "ready"
13-
: "not_initialized";
14-
std::string tensorrt_status{"not_supported"};
15-
#ifdef _WIN32
16-
onnx_status = std::filesystem::exists(ecp / "cortex.onnx")
17-
? "ready"
18-
: "not_initialized";
19-
tensorrt_status = std::filesystem::exists(ecp / "cortex.tensorrt-llm")
20-
? "ready"
21-
: "not_initialized";
22-
#elif defined(__linux__)
23-
tensorrt_status = std::filesystem::exists(ecp / "cortex.tensorrt-llm")
24-
? "ready"
25-
: "not_initialized";
26-
#endif
8+
auto engine_service = EngineService();
9+
auto status_list = engine_service.GetEngineInfoList();
2710

2811
tabulate::Table table;
29-
table.add_row(
30-
{"(Index)", "name", "description", "version", "product name", "status"});
3112
table.format().font_color(tabulate::Color::green);
3213
table.add_row(
33-
{"1", "cortex.onnx",
34-
"This extension enables chat completion API calls using the Onnx engine",
35-
"0.0.1", "Onnx Inference Engine", onnx_status});
36-
37-
table.add_row({"2", "cortex.llamacpp",
38-
"This extension enables chat completion API calls using the "
39-
"LlamaCPP engine",
40-
"0.0.1", "LlamaCPP Inference Engine", llamacpp_status});
41-
42-
// tensorrt llm
43-
table.add_row({"3", "cortex.tensorrt-llm",
44-
"This extension enables chat completion API calls using the "
45-
"TensorrtLLM engine",
46-
"0.0.1", "TensorrtLLM Inference Engine", tensorrt_status});
14+
{"(Index)", "name", "description", "version", "product name", "status"});
15+
for (int i = 0; i < status_list.size(); i++) {
16+
auto status = status_list[i];
17+
std::string index = std::to_string(i + 1);
18+
table.add_row({index, status.name, status.description, status.version,
19+
status.product_name, status.status});
20+
}
4721

4822
for (int i = 0; i < 6; i++) {
4923
table[0][i]
@@ -62,5 +36,4 @@ bool EngineListCmd::Exec() {
6236
std::cout << table << std::endl;
6337
return true;
6438
}
65-
6639
}; // namespace commands

engine/controllers/command_line_parser.cc

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include "commands/run_cmd.h"
1414
#include "commands/server_stop_cmd.h"
1515
#include "config/yaml_config.h"
16+
#include "services/engine_service.h"
1617
#include "utils/cortex_utils.h"
1718
#include "utils/logging_utils.h"
1819

@@ -185,11 +186,13 @@ void CommandLineParser::EngineManagement(CLI::App* parent,
185186

186187
void CommandLineParser::EngineGet(CLI::App* parent) {
187188
auto get_cmd = parent->add_subcommand("get", "Get an engine info");
189+
auto engine_service = EngineService();
188190

189-
for (auto& engine : supportedEngines_) {
191+
for (auto& engine : engine_service.kSupportEngines) {
190192
std::string engine_name{engine};
191193
std::string desc = "Get " + engine_name + " status";
192-
auto engine_get_cmd = get_cmd->add_subcommand(engine, desc);
194+
195+
auto engine_get_cmd = get_cmd->add_subcommand(engine_name, desc);
193196
engine_get_cmd->callback([engine_name] {
194197
commands::EngineGetCmd cmd(engine_name);
195198
cmd.Exec();

engine/controllers/command_line_parser.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
#pragma once
22

3-
#include <array>
43
#include "CLI/CLI.hpp"
54

65
class CommandLineParser {
@@ -15,8 +14,4 @@ class CommandLineParser {
1514
void EngineGet(CLI::App* parent);
1615

1716
CLI::App app_;
18-
19-
// TODO: move this one to somewhere else
20-
static constexpr std::array<const char*, 3> supportedEngines_ = {
21-
"cortex.llamacpp", "cortex.onnx", "cortex.tensorrt-llm"};
2217
};

engine/controllers/engines.cc

Lines changed: 79 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,17 @@
11
#include "engines.h"
2+
#include <filesystem>
3+
#include <sstream>
4+
#include <stdexcept>
5+
#include <utility>
6+
#include "services/engine_service.h"
27
#include "utils/archive_utils.h"
38
#include "utils/cortex_utils.h"
49
#include "utils/system_info_utils.h"
510

6-
void Engines::InitEngine(const HttpRequestPtr& req,
7-
std::function<void(const HttpResponsePtr&)>&& callback,
8-
const std::string& engine) const {
11+
void Engines::InstallEngine(
12+
const HttpRequestPtr& req,
13+
std::function<void(const HttpResponsePtr&)>&& callback,
14+
const std::string& engine) const {
915
LOG_DEBUG << "InitEngine, Engine: " << engine;
1016
if (engine.empty()) {
1117
Json::Value res;
@@ -114,62 +120,84 @@ void Engines::InitEngine(const HttpRequestPtr& req,
114120
void Engines::ListEngine(
115121
const HttpRequestPtr& req,
116122
std::function<void(const HttpResponsePtr&)>&& callback) const {
123+
auto engine_service = EngineService();
124+
auto status_list = engine_service.GetEngineInfoList();
125+
117126
Json::Value ret;
118127
ret["object"] = "list";
119128
Json::Value data(Json::arrayValue);
120-
Json::Value obj_onnx, obj_llamacpp, obj_tensorrt;
121-
obj_onnx["name"] = "cortex.onnx";
122-
obj_onnx["description"] =
123-
"This extension enables chat completion API calls using the Onnx engine";
124-
obj_onnx["version"] = "0.0.1";
125-
obj_onnx["productName"] = "Onnx Inference Engine";
126-
127-
obj_llamacpp["name"] = "cortex.llamacpp";
128-
obj_llamacpp["description"] =
129-
"This extension enables chat completion API calls using the LlamaCPP "
130-
"engine";
131-
obj_llamacpp["version"] = "0.0.1";
132-
obj_llamacpp["productName"] = "LlamaCPP Inference Engine";
133-
134-
obj_tensorrt["name"] = "cortex.tensorrt-llm";
135-
obj_tensorrt["description"] =
136-
"This extension enables chat completion API calls using the TensorrtLLM "
137-
"engine";
138-
obj_tensorrt["version"] = "0.0.1";
139-
obj_tensorrt["productName"] = "TensorrtLLM Inference Engine";
140-
141-
#ifdef _WIN32
142-
if (std::filesystem::exists(std::filesystem::current_path().string() +
143-
cortex_utils::kOnnxLibPath)) {
144-
obj_onnx["status"] = "ready";
145-
} else {
146-
obj_onnx["status"] = "not_initialized";
147-
}
148-
#else
149-
obj_onnx["status"] = "not_supported";
150-
#endif
151-
// lllamacpp
152-
if (std::filesystem::exists(std::filesystem::current_path().string() +
153-
cortex_utils::kLlamaLibPath)) {
154-
155-
obj_llamacpp["status"] = "ready";
156-
} else {
157-
obj_llamacpp["status"] = "not_initialized";
158-
}
159-
// tensorrt llm
160-
if (std::filesystem::exists(std::filesystem::current_path().string() +
161-
cortex_utils::kTensorrtLlmPath)) {
162-
obj_tensorrt["status"] = "ready";
163-
} else {
164-
obj_tensorrt["status"] = "not_initialized";
129+
for (auto& status : status_list) {
130+
Json::Value ret;
131+
ret["name"] = status.name;
132+
ret["description"] = status.description;
133+
ret["version"] = status.version;
134+
ret["productName"] = status.product_name;
135+
ret["status"] = status.status;
136+
137+
data.append(std::move(ret));
165138
}
166139

167-
data.append(std::move(obj_onnx));
168-
data.append(std::move(obj_llamacpp));
169-
data.append(std::move(obj_tensorrt));
170140
ret["data"] = data;
171141
ret["result"] = "OK";
172142
auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret);
173143
resp->setStatusCode(k200OK);
174144
callback(resp);
175145
}
146+
147+
void Engines::GetEngine(const HttpRequestPtr& req,
148+
std::function<void(const HttpResponsePtr&)>&& callback,
149+
const std::string& engine) const {
150+
auto engine_service = EngineService();
151+
try {
152+
auto status = engine_service.GetEngineInfo(engine);
153+
Json::Value ret;
154+
ret["name"] = status.name;
155+
ret["description"] = status.description;
156+
ret["version"] = status.version;
157+
ret["productName"] = status.product_name;
158+
ret["status"] = status.status;
159+
160+
auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret);
161+
resp->setStatusCode(k200OK);
162+
callback(resp);
163+
} catch (const std::runtime_error e) {
164+
Json::Value ret;
165+
ret["message"] = e.what();
166+
auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret);
167+
resp->setStatusCode(k400BadRequest);
168+
callback(resp);
169+
} catch (const std::exception& e) {
170+
Json::Value ret;
171+
ret["message"] = e.what();
172+
auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret);
173+
resp->setStatusCode(k500InternalServerError);
174+
callback(resp);
175+
}
176+
}
177+
178+
void Engines::UninstallEngine(
179+
const HttpRequestPtr& req,
180+
std::function<void(const HttpResponsePtr&)>&& callback,
181+
const std::string& engine) const {
182+
LOG_INFO << "[Http] Uninstall engine " << engine;
183+
// TODO: think of a way to prevent code duplication. This should be shared with cmd as well
184+
185+
// TODO: Unload the model which is currently running on engine_
186+
187+
// TODO: Unload engine if is loaded
188+
189+
// auto ecp = file_manager_utils::GetEnginesContainerPath();
190+
// auto engine_path = ecp / engine;
191+
// if (!std::filesystem::exists(engine_path)) {
192+
// ("Engine " << engine_ << " is not installed!");
193+
// return;
194+
// }
195+
//
196+
// // remove
197+
// try {
198+
// std::filesystem::remove_all(engine_path);
199+
// CTL_INF("Engine " << engine_ << " uninstalled successfully!");
200+
// } catch (const std::exception& e) {
201+
// CTL_ERR("Failed to uninstall engine " << engine_ + ": " << e.what());
202+
// }
203+
}

0 commit comments

Comments
 (0)