feat: cortex start command (#1235)

vansangpfiev · web-flow · commit 131b61069578 · 2024-09-18T08:34:39.000+07:00
* feat: cortex start

* fix: print app.help

* feat: add -p, --port for cortex start

* fix: check if model is started before start model

* fix: correct binary name

* fix: e2e test

* fix: host, port from config

* fix: correct condition

* fix: correct url

* fix: add retry check health status
diff --git a/engine/commands/chat_cmd.cc b/engine/commands/chat_cmd.cc
@@ -1,8 +1,10 @@
 #include "chat_cmd.h"
 #include "httplib.h"
 
+#include "cortex_upd_cmd.h"
 #include "trantor/utils/Logger.h"
 #include "utils/logging_utils.h"
+#include "server_start_cmd.h"
 
 namespace commands {
 namespace {
@@ -33,6 +35,15 @@ ChatCmd::ChatCmd(std::string host, int port, const config::ModelConfig& mc)
     : host_(std::move(host)), port_(port), mc_(mc) {}
 
 void ChatCmd::Exec(std::string msg) {
+  // Check if server is started
+  {
+    if (!commands::IsServerAlive(host_, port_)) {
+      CLI_LOG("Server is not started yet, please run `"
+              << commands::GetCortexBinary() << " start` to start server!");
+      return;
+    }
+  }
+
   auto address = host_ + ":" + std::to_string(port_);
   // Check if model is loaded
   // TODO(sang) only llamacpp support modelstatus for now
diff --git a/engine/commands/cortex_upd_cmd.cc b/engine/commands/cortex_upd_cmd.cc
@@ -15,7 +15,7 @@ void CortexUpdCmd::Exec(std::string v) {
   {
     auto config = file_manager_utils::GetCortexConfig();
     httplib::Client cli(config.apiServerHost + ":" + config.apiServerPort);
-    auto res = cli.Get("/health/healthz");
+    auto res = cli.Get("/healthz");
     if (res) {
       CLI_LOG("Server is running. Stopping server before updating!");
       commands::ServerStopCmd ssc(config.apiServerHost,
diff --git a/engine/commands/model_start_cmd.cc b/engine/commands/model_start_cmd.cc
@@ -1,7 +1,10 @@
 #include "model_start_cmd.h"
+#include "cortex_upd_cmd.h"
 #include "httplib.h"
 #include "nlohmann/json.hpp"
+#include "server_start_cmd.h"
 #include "trantor/utils/Logger.h"
+#include "utils/file_manager_utils.h"
 #include "utils/logging_utils.h"
 
 namespace commands {
@@ -10,7 +13,15 @@ ModelStartCmd::ModelStartCmd(std::string host, int port,
     : host_(std::move(host)), port_(port), mc_(mc) {}
 
 bool ModelStartCmd::Exec() {
+  // Check if server is started
+  if (!commands::IsServerAlive(host_, port_)) {
+    CLI_LOG("Server is not started yet, please run `"
+            << commands::GetCortexBinary() << " start` to start server!");
+    return false;
+  }
+
   httplib::Client cli(host_ + ":" + std::to_string(port_));
+
   nlohmann::json json_data;
   if (mc_.files.size() > 0) {
     // TODO(sang) support multiple files
diff --git a/engine/commands/run_cmd.cc b/engine/commands/run_cmd.cc
@@ -2,7 +2,13 @@
 #include "chat_cmd.h"
 #include "cmd_info.h"
 #include "config/yaml_config.h"
+#include "engine_install_cmd.h"
+#include "httplib.h"
+#include "model_pull_cmd.h"
 #include "model_start_cmd.h"
+#include "server_start_cmd.h"
+#include "trantor/utils/Logger.h"
+#include "utils/cortex_utils.h"
 #include "utils/file_manager_utils.h"
 
 namespace commands {
@@ -15,7 +21,7 @@ void RunCmd::Exec() {
   // TODO should we clean all resource if something fails?
   // Check if model existed. If not, download it
   {
-    auto model_conf = model_service_.GetDownloadedModel(model_id_);
+    auto model_conf = model_service_.GetDownloadedModel(model_file + ".yaml");
     if (!model_conf.has_value()) {
       model_service_.DownloadModel(model_id_);
     }
@@ -35,6 +41,17 @@ void RunCmd::Exec() {
     }
   }
 
+  // Start server if it is not running
+  {
+    if (!commands::IsServerAlive(host_, port_)) {
+      CLI_LOG("Starting server ...");
+      commands::ServerStartCmd ssc;
+      if(!ssc.Exec(host_, port_)) {
+        return;
+      }
+    }
+  }
+
   // Start model
   config::YamlHandler yaml_handler;
   yaml_handler.ModelConfigFromFile(
diff --git a/engine/commands/server_start_cmd.cc b/engine/commands/server_start_cmd.cc
@@ -0,0 +1,106 @@
+#include "server_start_cmd.h"
+#include "commands/cortex_upd_cmd.h"
+#include "httplib.h"
+#include "trantor/utils/Logger.h"
+#include "utils/cortex_utils.h"
+#include "utils/file_manager_utils.h"
+#include "utils/logging_utils.h"
+
+namespace commands {
+
+namespace {
+bool TryConnectToServer(const std::string& host, int port) {
+  constexpr const auto kMaxRetry = 3u;
+  auto count = 0u;
+  // Check if server is started
+  while (true) {
+    if (IsServerAlive(host, port))
+      break;
+    // Wait for server up
+    std::this_thread::sleep_for(std::chrono::seconds(1));
+    if (count++ == kMaxRetry) {
+      std::cerr << "Could not start server" << std::endl;
+      return false;
+    }
+  }
+  return true;
+}
+}  // namespace
+
+ServerStartCmd::ServerStartCmd() {}
+
+bool ServerStartCmd::Exec(const std::string& host, int port) {
+#if defined(_WIN32) || defined(_WIN64)
+  // Windows-specific code to create a new process
+  STARTUPINFO si;
+  PROCESS_INFORMATION pi;
+
+  ZeroMemory(&si, sizeof(si));
+  si.cb = sizeof(si);
+  ZeroMemory(&pi, sizeof(pi));
+  auto exe = commands::GetCortexBinary();
+  std::string cmds =
+      cortex_utils::GetCurrentPath() + "/" + exe + " --start-server";
+  // Create child process
+  if (!CreateProcess(
+          NULL,  // No module name (use command line)
+          const_cast<char*>(
+              cmds.c_str()),  // Command line (replace with your actual executable)
+          NULL,               // Process handle not inheritable
+          NULL,               // Thread handle not inheritable
+          FALSE,              // Set handle inheritance to FALSE
+          0,                  // No creation flags
+          NULL,               // Use parent's environment block
+          NULL,               // Use parent's starting directory
+          &si,                // Pointer to STARTUPINFO structure
+          &pi))               // Pointer to PROCESS_INFORMATION structure
+  {
+    std::cout << "Could not start server: " << GetLastError() << std::endl;
+    return false;
+  } else {
+    if(!TryConnectToServer(host, port)) {
+        return false;
+    }
+    std::cout << "Server started" << std::endl;
+  }
+
+#else
+  // Unix-like system-specific code to fork a child process
+  pid_t pid = fork();
+
+  if (pid < 0) {
+    // Fork failed
+    std::cerr << "Could not start server: " << std::endl;
+    return false;
+  } else if (pid == 0) {
+    // No need to configure LD_LIBRARY_PATH for macOS
+#if !defined(__APPLE__) || !defined(__MACH__)
+    const char* name = "LD_LIBRARY_PATH";
+    auto data = getenv(name);
+    std::string v;
+    if (auto g = getenv(name); g) {
+      v += g;
+    }
+    CTL_INF("LD_LIBRARY_PATH: " << v);
+    auto data_path = file_manager_utils::GetCortexDataPath();
+    auto llamacpp_path = data_path / "engines" / "cortex.llamacpp/";
+    auto trt_path = data_path / "engines" / "cortex.tensorrt-llm/";
+    auto new_v = trt_path.string() + ":" + llamacpp_path.string() + ":" + v;
+    setenv(name, new_v.c_str(), true);
+    CTL_INF("LD_LIBRARY_PATH: " << getenv(name));
+#endif
+    auto exe = commands::GetCortexBinary();
+    std::string p = cortex_utils::GetCurrentPath() + "/" + exe;
+    execl(p.c_str(), exe.c_str(), "--start-server", (char*)0);
+  } else {
+    // Parent process
+    if(!TryConnectToServer(host, port)) {
+        return false;
+    }
+    std::cout << "Server started" << std::endl;
+  }
+#endif
+  return true;
+}
+
+};  // namespace commands
diff --git a/engine/commands/server_start_cmd.h b/engine/commands/server_start_cmd.h
@@ -0,0 +1,21 @@
+#pragma once
+#include <string>
+#include "httplib.h"
+
+namespace commands {
+
+inline bool IsServerAlive(const std::string& host, int port) {
+  httplib::Client cli(host + ":" + std::to_string(port));
+  auto res = cli.Get("/healthz");
+  if (res && res->status == httplib::StatusCode::OK_200) {
+    return true;
+  }
+  return false;
+}
+
+class ServerStartCmd {
+ public:
+  ServerStartCmd();
+  bool Exec(const std::string& host, int port);
+};
+}  // namespace commands
diff --git a/engine/controllers/command_line_parser.cc b/engine/controllers/command_line_parser.cc
@@ -13,6 +13,7 @@
 #include "commands/model_start_cmd.h"
 #include "commands/model_stop_cmd.h"
 #include "commands/run_cmd.h"
+#include "commands/server_start_cmd.h"
 #include "commands/server_stop_cmd.h"
 #include "config/yaml_config.h"
 #include "services/engine_service.h"
@@ -174,6 +175,21 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) {
     });
   }
 
+  auto start_cmd = app_.add_subcommand("start", "Start the API server");
+  int port = std::stoi(config.apiServerPort);
+  start_cmd->add_option("-p, --port", port, "Server port to listen");
+  start_cmd->callback([&config, &port] {
+    if (port != stoi(config.apiServerPort)) {
+      CTL_INF("apiServerPort changed from " << config.apiServerPort << " to "
+                                            << port);
+      auto config_path = file_manager_utils::GetConfigurationPath();
+      config.apiServerPort = std::to_string(port);
+      config_yaml_utils::DumpYamlConfig(config, config_path.string());
+    }
+    commands::ServerStartCmd ssc;
+    ssc.Exec(config.apiServerHost, std::stoi(config.apiServerPort));
+  });
+
   auto stop_cmd = app_.add_subcommand("stop", "Stop the API server");
 
   stop_cmd->callback([&config] {
@@ -208,6 +224,10 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) {
   }
 
   CLI11_PARSE(app_, argc, argv);
+  if (argc == 1) {
+    CLI_LOG(app_.help());
+    return true;
+  }
 
   // Check new update, only check for stable release for now
 #ifdef CORTEX_CPP_VERSION
diff --git a/engine/e2e-test/test_runner.py b/engine/e2e-test/test_runner.py
@@ -50,7 +50,10 @@ def start_server() -> bool:
 def start_server_nix() -> bool:
     executable = getExecutablePath()
     process = subprocess.Popen(
-        executable, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
+        [executable] + ['start', '-p', '3928'], 
+        stdout=subprocess.PIPE, 
+        stderr=subprocess.PIPE, 
+        text=True
     )
 
     start_time = time.time()
@@ -77,7 +80,7 @@ def start_server_nix() -> bool:
 def start_server_windows() -> bool:
     executable = getExecutablePath()
     process = subprocess.Popen(
-        executable,
+        [executable] + ['start', '-p', '3928'],
         stdout=subprocess.PIPE,
         stderr=subprocess.PIPE,
         text=True,
diff --git a/engine/main.cc b/engine/main.cc

Original file line number	Diff line number	Diff line change
`@@ -15,7 +15,7 @@ void CortexUpdCmd::Exec(std::string v) {`
`15`	`15`	`{`
`16`	`16`	`auto config = file_manager_utils::GetCortexConfig();`
`17`	`17`	`httplib::Client cli(config.apiServerHost + ":" + config.apiServerPort);`
`18`		`- auto res = cli.Get("/health/healthz");`
	`18`	`+ auto res = cli.Get("/healthz");`
`19`	`19`	`if (res) {`
`20`	`20`	`CLI_LOG("Server is running. Stopping server before updating!");`
`21`	`21`	`commands::ServerStopCmd ssc(config.apiServerHost,`