From 37f304548e3a3a4b87932e9b60ebec0cda7ce8df Mon Sep 17 00:00:00 2001 From: vansangpfiev Date: Thu, 19 Sep 2024 11:40:30 +0700 Subject: [PATCH 01/11] temp --- engine/controllers/command_line_parser.cc | 3 + engine/services/engine_service.cc | 85 ++++++++++++++++++++++- engine/services/engine_service.h | 2 + 3 files changed, 89 insertions(+), 1 deletion(-) diff --git a/engine/controllers/command_line_parser.cc b/engine/controllers/command_line_parser.cc index 9bee6b3fe..f174b942f 100644 --- a/engine/controllers/command_line_parser.cc +++ b/engine/controllers/command_line_parser.cc @@ -245,6 +245,9 @@ void CommandLineParser::EngineInstall(CLI::App* parent, install_engine_cmd->add_option("-v, --version", version, "Engine version to download"); + std::string source; + install_engine_cmd->add_option("-s, --source", source, + "Download engine by local path or remote url"); install_engine_cmd->callback([engine_name, &version] { try { diff --git a/engine/services/engine_service.cc b/engine/services/engine_service.cc index 68368fe4a..a6de4a607 100644 --- a/engine/services/engine_service.cc +++ b/engine/services/engine_service.cc @@ -109,7 +109,7 @@ void EngineService::InstallEngine(const std::string& engine, body = get_data(body); } if (body.empty()) { - throw std::runtime_error("No release found for " + version); + throw std::runtime_error("No release found for " + version); } auto assets = body["assets"]; @@ -282,6 +282,89 @@ void EngineService::InstallEngine(const std::string& engine, } } +void EngineService::InstallLocalEngine(const std::string& engine, + const std::string& path) { + auto system_info = system_info_utils::GetSystemInfo(); + std::string cuda_variant = ""; + // Get CPU, GPU info + auto cuda_driver_version = system_info_utils::GetCudaVersion(); + CTL_INF("engine: " << engine); + CTL_INF("CUDA version: " << cuda_driver_version); + std::string matched_variant = ""; + if (engine == "cortex.tensorrt-llm") { + matched_variant = engine_matcher_utils::ValidateTensorrtLlm( + variants, system_info.os, cuda_driver_version); + } else if (engine == "cortex.onnx") { + matched_variant = engine_matcher_utils::ValidateOnnx( + variants, system_info.os, system_info.arch); + } else if (engine == "cortex.llamacpp") { + cortex::cpuid::CpuInfo cpu_info; + auto suitable_avx = engine_matcher_utils::GetSuitableAvxVariant(cpu_info); + matched_variant = engine_matcher_utils::Validate( + variants, system_info.os, system_info.arch, suitable_avx, + cuda_driver_version); + } + CTL_INF("Matched variant: " << matched_variant); + if (matched_variant.empty()) { + CTL_ERR("No variant found for " << os_arch); + throw std::runtime_error("No variant found for " + os_arch); + } + + if (engine == "cortex.tensorrt-llm") { + // for tensorrt-llm, we need to download cuda toolkit v12.4 + cuda_variant = "cuda-12.4.tar.gz"; + } else { + // llamacpp + auto cuda_driver_semver = + semantic_version_utils::SplitVersion(cuda_driver_version); + cuda_variant = "cuda-" + std::to_string(cuda_driver_semver.major) + "." + + std::to_string(cuda_driver_semver.minor) + "tar.gz"; + } + + CTL_INF("Matched cuda variant: " << cuda_variant); + if (cuda_variant.empty()) { + CTL_ERR("No cuda variant found for " << engine); + throw std::runtime_error("No cuda variant found for " + engine); + } + + // Check if folder exists + bool found_engine = false; + bool found_cuda = false; + if (std::filesystem::exists(path) && std::filesystem::is_directory(path)) { + for (const auto& entry : std::filesystem::directory_iterator(models_path)) { + if (entry.is_regular_file() && entry.path().extension() == ".tar.gz") { + // Check if match engine binary + if (std::string filename = entry.path().stem().string(); + filename == matched_variant) { + found_engine = true; + // extract binary + auto engine_path = + file_manager_utils::GetEnginesContainerPath() / engine; + archive_utils::ExtractArchive(path + "/" + filename, + engine_path.string()); + + } else if (std::string cf = entry.path().stem().string(); + cf == cuda_variant) { + found_cuda = true; + // extract binary + auto engine_path = + file_manager_utils::GetEnginesContainerPath() / engine; + archive_utils::ExtractArchive(path + "/" + cf, + engine_path.string()); + } + } + } + } else { + // Folder does not exist, throw exception + } + + // Not match, download from remote + if (!found_engine) {} + + // Not match any cuda binary, download from remote + if (!found_cuda) {} +} + void EngineService::UninstallEngine(const std::string& engine) { // TODO: Unload the model which is currently running on engine_ diff --git a/engine/services/engine_service.h b/engine/services/engine_service.h index 442923356..1f4daa701 100644 --- a/engine/services/engine_service.h +++ b/engine/services/engine_service.h @@ -30,5 +30,7 @@ class EngineService { void InstallEngine(const std::string& engine, const std::string& version = "latest"); + void InstallLocalEngine(const std::string& engine, const std::string& path); + void UninstallEngine(const std::string& engine); }; From 0daa8f4277b584a92f7da891506866679e3b4694 Mon Sep 17 00:00:00 2001 From: vansangpfiev Date: Thu, 19 Sep 2024 14:17:45 +0700 Subject: [PATCH 02/11] feat: next --- engine/commands/engine_install_cmd.cc | 5 +- engine/commands/engine_install_cmd.h | 3 +- engine/controllers/command_line_parser.cc | 14 ++-- engine/controllers/command_line_parser.h | 2 +- engine/services/engine_service.cc | 90 ++++++++++------------- engine/services/engine_service.h | 3 +- 6 files changed, 53 insertions(+), 64 deletions(-) diff --git a/engine/commands/engine_install_cmd.cc b/engine/commands/engine_install_cmd.cc index 36f7a040b..59d51bfd1 100644 --- a/engine/commands/engine_install_cmd.cc +++ b/engine/commands/engine_install_cmd.cc @@ -10,8 +10,9 @@ namespace commands { void EngineInstallCmd::Exec(const std::string& engine, - const std::string& version) { - engine_service_.InstallEngine(engine, version); + const std::string& version, + const std::string& src) { + engine_service_.InstallEngine(engine, version, src); CLI_LOG("Engine " << engine << " installed successfully!"); } }; // namespace commands diff --git a/engine/commands/engine_install_cmd.h b/engine/commands/engine_install_cmd.h index c6ba6f135..32b7079cc 100644 --- a/engine/commands/engine_install_cmd.h +++ b/engine/commands/engine_install_cmd.h @@ -9,7 +9,8 @@ class EngineInstallCmd { public: explicit EngineInstallCmd() : engine_service_{EngineService()} {}; - void Exec(const std::string& engine, const std::string& version = "latest"); + void Exec(const std::string& engine, const std::string& version = "latest", + const std::string& src = ""); private: EngineService engine_service_; diff --git a/engine/controllers/command_line_parser.cc b/engine/controllers/command_line_parser.cc index 6c38d3ca3..4c55fcefb 100644 --- a/engine/controllers/command_line_parser.cc +++ b/engine/controllers/command_line_parser.cc @@ -157,6 +157,7 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) { // Default version is latest std::string version{"latest"}; + std::string src; // engines group commands auto engines_cmd = app_.add_subcommand("engines", "Subcommands for managing engines"); @@ -174,7 +175,7 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) { install_cmd->require_subcommand(); for (auto& engine : engine_service_.kSupportEngines) { std::string engine_name{engine}; - EngineInstall(install_cmd, engine_name, version); + EngineInstall(install_cmd, engine_name, version, src); } auto uninstall_cmd = @@ -257,18 +258,19 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) { void CommandLineParser::EngineInstall(CLI::App* parent, const std::string& engine_name, - std::string& version) { + std::string& version, + std::string& src) { auto install_engine_cmd = parent->add_subcommand(engine_name, ""); install_engine_cmd->add_option("-v, --version", version, "Engine version to download"); - std::string source; - install_engine_cmd->add_option("-s, --source", source, + + install_engine_cmd->add_option("-s, --source", src, "Download engine by local path or remote url"); - install_engine_cmd->callback([engine_name, &version] { + install_engine_cmd->callback([engine_name, &version, &src] { try { - commands::EngineInstallCmd().Exec(engine_name, version); + commands::EngineInstallCmd().Exec(engine_name, version, src); } catch (const std::exception& e) { CTL_ERR(e.what()); } diff --git a/engine/controllers/command_line_parser.h b/engine/controllers/command_line_parser.h index e4a2f47c5..eb73cb04d 100644 --- a/engine/controllers/command_line_parser.h +++ b/engine/controllers/command_line_parser.h @@ -10,7 +10,7 @@ class CommandLineParser { private: void EngineInstall(CLI::App* parent, const std::string& engine_name, - std::string& version); + std::string& version, std::string& src); void EngineUninstall(CLI::App* parent, const std::string& engine_name); diff --git a/engine/services/engine_service.cc b/engine/services/engine_service.cc index a6de4a607..72ec7e4d5 100644 --- a/engine/services/engine_service.cc +++ b/engine/services/engine_service.cc @@ -74,7 +74,11 @@ std::vector EngineService::GetEngineInfoList() const { } void EngineService::InstallEngine(const std::string& engine, - const std::string& version) { + const std::string& version, + const std::string& src) { + + InstallLocalEngine(engine, src); + return; auto system_info = system_info_utils::GetSystemInfo(); auto get_params = [&engine, &version]() -> std::vector { if (version == "latest") { @@ -284,6 +288,7 @@ void EngineService::InstallEngine(const std::string& engine, void EngineService::InstallLocalEngine(const std::string& engine, const std::string& path) { + bool found_cuda = false; auto system_info = system_info_utils::GetSystemInfo(); std::string cuda_variant = ""; // Get CPU, GPU info @@ -291,6 +296,29 @@ void EngineService::InstallLocalEngine(const std::string& engine, CTL_INF("engine: " << engine); CTL_INF("CUDA version: " << cuda_driver_version); std::string matched_variant = ""; + + std::vector variants; + if (std::filesystem::exists(path) && std::filesystem::is_directory(path)) { + for (const auto& entry : std::filesystem::directory_iterator(path)) { + CTL_INF("file path: " << entry.path().string()); + if (entry.is_regular_file() && (entry.path().extension() == ".tar.gz" || + entry.path().extension() == ".gz")) { + CTL_INF("file name: " << entry.path().filename().string()); + variants.push_back(entry.path().filename().string()); + if (std::string cf = entry.path().stem().string(); cf == cuda_variant) { + found_cuda = true; + // extract binary + auto engine_path = + file_manager_utils::GetEnginesContainerPath() / engine; + archive_utils::ExtractArchive(path + "/" + cf, engine_path.string()); + } + } + } + } else { + // Folder does not exist, throw exception + CTL_ERR("Folder does not exist: " << path); + } + if (engine == "cortex.tensorrt-llm") { matched_variant = engine_matcher_utils::ValidateTensorrtLlm( variants, system_info.os, cuda_driver_version); @@ -304,62 +332,18 @@ void EngineService::InstallLocalEngine(const std::string& engine, variants, system_info.os, system_info.arch, suitable_avx, cuda_driver_version); } + CTL_INF("Matched variant: " << matched_variant); if (matched_variant.empty()) { - CTL_ERR("No variant found for " << os_arch); - throw std::runtime_error("No variant found for " + os_arch); - } - - if (engine == "cortex.tensorrt-llm") { - // for tensorrt-llm, we need to download cuda toolkit v12.4 - cuda_variant = "cuda-12.4.tar.gz"; + CTL_INF("No variant found for " << system_info.os << "-" + << system_info.arch); + // Go with the remote flow } else { - // llamacpp - auto cuda_driver_semver = - semantic_version_utils::SplitVersion(cuda_driver_version); - cuda_variant = "cuda-" + std::to_string(cuda_driver_semver.major) + "." + - std::to_string(cuda_driver_semver.minor) + "tar.gz"; + auto engine_path = file_manager_utils::GetEnginesContainerPath(); + archive_utils::ExtractArchive(path + "/" + matched_variant, + engine_path.string()); } - - CTL_INF("Matched cuda variant: " << cuda_variant); - if (cuda_variant.empty()) { - CTL_ERR("No cuda variant found for " << engine); - throw std::runtime_error("No cuda variant found for " + engine); - } - - // Check if folder exists - bool found_engine = false; - bool found_cuda = false; - if (std::filesystem::exists(path) && std::filesystem::is_directory(path)) { - for (const auto& entry : std::filesystem::directory_iterator(models_path)) { - if (entry.is_regular_file() && entry.path().extension() == ".tar.gz") { - // Check if match engine binary - if (std::string filename = entry.path().stem().string(); - filename == matched_variant) { - found_engine = true; - // extract binary - auto engine_path = - file_manager_utils::GetEnginesContainerPath() / engine; - archive_utils::ExtractArchive(path + "/" + filename, - engine_path.string()); - - } else if (std::string cf = entry.path().stem().string(); - cf == cuda_variant) { - found_cuda = true; - // extract binary - auto engine_path = - file_manager_utils::GetEnginesContainerPath() / engine; - archive_utils::ExtractArchive(path + "/" + cf, - engine_path.string()); - } - } - } - } else { - // Folder does not exist, throw exception - } - - // Not match, download from remote - if (!found_engine) {} + return; // Not match any cuda binary, download from remote if (!found_cuda) {} diff --git a/engine/services/engine_service.h b/engine/services/engine_service.h index 1f4daa701..0a03808a0 100644 --- a/engine/services/engine_service.h +++ b/engine/services/engine_service.h @@ -28,7 +28,8 @@ class EngineService { std::vector GetEngineInfoList() const; void InstallEngine(const std::string& engine, - const std::string& version = "latest"); + const std::string& version = "latest", + const std::string& src = ""); void InstallLocalEngine(const std::string& engine, const std::string& path); From 02d5f849139d19c5055c51d6b74f1853cba3a866 Mon Sep 17 00:00:00 2001 From: vansangpfiev Date: Thu, 19 Sep 2024 15:10:22 +0700 Subject: [PATCH 03/11] feat: more --- engine/services/engine_service.cc | 325 ++++++++++++++++-------------- engine/services/engine_service.h | 9 +- 2 files changed, 180 insertions(+), 154 deletions(-) diff --git a/engine/services/engine_service.cc b/engine/services/engine_service.cc index 72ec7e4d5..e3ac8f663 100644 --- a/engine/services/engine_service.cc +++ b/engine/services/engine_service.cc @@ -77,8 +77,108 @@ void EngineService::InstallEngine(const std::string& engine, const std::string& version, const std::string& src) { - InstallLocalEngine(engine, src); - return; + if (!src.empty()) { + UnzipEngine(engine, version, src); + } else { + DownloadEngine(engine, version); + DownloadCuda(engine); + } +} + +void EngineService::UnzipEngine(const std::string& engine, + const std::string& version, + const std::string& path) { + bool found_cuda = false; + auto system_info = system_info_utils::GetSystemInfo(); + + // Get CPU, GPU info + auto cuda_driver_version = system_info_utils::GetCudaVersion(); + CTL_INF("engine: " << engine); + CTL_INF("CUDA version: " << cuda_driver_version); + std::string matched_variant = ""; + std::string cuda_variant = "cuda-"; + cuda_variant += cuda_driver_version; + cuda_variant += ".tar.gz"; + std::vector variants; + if (std::filesystem::exists(path) && std::filesystem::is_directory(path)) { + for (const auto& entry : std::filesystem::directory_iterator(path)) { + CTL_INF("file path: " << entry.path().string()); + if (entry.is_regular_file() && (entry.path().extension() == ".tar.gz" || + entry.path().extension() == ".gz")) { + CTL_INF("file name: " << entry.path().filename().string()); + variants.push_back(entry.path().filename().string()); + if (std::string cf = entry.path().filename().string(); + cf == cuda_variant) { + CTL_INF("Found cuda variant, extract it"); + found_cuda = true; + // extract binary + auto engine_path = + file_manager_utils::GetEnginesContainerPath() / engine; + archive_utils::ExtractArchive(path + "/" + cf, engine_path.string()); + } + } + } + } else { + // Folder does not exist, throw exception + CTL_ERR("Folder does not exist: " << path); + return; + } + + if (engine == "cortex.tensorrt-llm") { + matched_variant = engine_matcher_utils::ValidateTensorrtLlm( + variants, system_info.os, cuda_driver_version); + } else if (engine == "cortex.onnx") { + matched_variant = engine_matcher_utils::ValidateOnnx( + variants, system_info.os, system_info.arch); + } else if (engine == "cortex.llamacpp") { + cortex::cpuid::CpuInfo cpu_info; + auto suitable_avx = engine_matcher_utils::GetSuitableAvxVariant(cpu_info); + matched_variant = engine_matcher_utils::Validate( + variants, system_info.os, system_info.arch, suitable_avx, + cuda_driver_version); + } + + CTL_INF("Matched variant: " << matched_variant); + if (matched_variant.empty()) { + CTL_INF("No variant found for " << system_info.os << "-" << system_info.arch + << ", will get engine from remote"); + // Go with the remote flow + DownloadEngine(engine, version); + } else { + auto engine_path = file_manager_utils::GetEnginesContainerPath(); + archive_utils::ExtractArchive(path + "/" + matched_variant, + engine_path.string()); + } + + // Not match any cuda binary, download from remote + if (!found_cuda) { + DownloadCuda(engine); + } +} + +void EngineService::UninstallEngine(const std::string& engine) { + // TODO: Unload the model which is currently running on engine_ + + // TODO: Unload engine if is loaded + + auto ecp = file_manager_utils::GetEnginesContainerPath(); + auto engine_path = ecp / engine; + + if (!std::filesystem::exists(engine_path)) { + throw std::runtime_error("Engine " + engine + " is not installed!"); + } + + try { + std::filesystem::remove_all(engine_path); + CTL_INF("Engine " << engine << " uninstalled successfully!"); + } catch (const std::exception& e) { + CTL_ERR("Failed to uninstall engine " << engine << ": " << e.what()); + throw; + } +} + +void EngineService::DownloadEngine(const std::string& engine, + const std::string& version) { auto system_info = system_info_utils::GetSystemInfo(); auto get_params = [&engine, &version]() -> std::vector { if (version == "latest") { @@ -199,85 +299,7 @@ void EngineService::InstallEngine(const std::string& engine, } CTL_INF("Finished!"); }); - if (system_info.os == "mac" || engine == "cortex.onnx") { - // mac and onnx engine does not require cuda toolkit - return; - } - - if (cuda_driver_version.empty()) { - CTL_WRN("No cuda driver, continue with CPU"); - return; - } - // download cuda toolkit - const std::string jan_host = "https://catalog.jan.ai"; - const std::string cuda_toolkit_file_name = "cuda.tar.gz"; - const std::string download_id = "cuda"; - - // TODO: we don't have API to retrieve list of cuda toolkit dependencies atm because we hosting it at jan - // will have better logic after https://github.com/janhq/cortex/issues/1046 finished - // for now, assume that we have only 11.7 and 12.4 - auto suitable_toolkit_version = ""; - if (engine == "cortex.tensorrt-llm") { - // for tensorrt-llm, we need to download cuda toolkit v12.4 - suitable_toolkit_version = "12.4"; - } else { - // llamacpp - auto cuda_driver_semver = - semantic_version_utils::SplitVersion(cuda_driver_version); - if (cuda_driver_semver.major == 11) { - suitable_toolkit_version = "11.7"; - } else if (cuda_driver_semver.major == 12) { - suitable_toolkit_version = "12.4"; - } - } - - // compare cuda driver version with cuda toolkit version - // cuda driver version should be greater than toolkit version to ensure compatibility - if (semantic_version_utils::CompareSemanticVersion( - cuda_driver_version, suitable_toolkit_version) < 0) { - CTL_ERR("Your Cuda driver version " - << cuda_driver_version - << " is not compatible with cuda toolkit version " - << suitable_toolkit_version); - throw std::runtime_error( - "Cuda driver is not compatible with cuda toolkit"); - } - - std::ostringstream cuda_toolkit_url; - cuda_toolkit_url << jan_host << "/" << "dist/cuda-dependencies/" - << cuda_driver_version << "/" << system_info.os << "/" - << cuda_toolkit_file_name; - - LOG_DEBUG << "Cuda toolkit download url: " << cuda_toolkit_url.str(); - auto cuda_toolkit_local_path = - file_manager_utils::GetContainerFolderPath( - file_manager_utils::DownloadTypeToString( - DownloadType::CudaToolkit)) / - cuda_toolkit_file_name; - LOG_DEBUG << "Download to: " << cuda_toolkit_local_path.string(); - auto downloadCudaToolkitTask{DownloadTask{ - .id = download_id, - .type = DownloadType::CudaToolkit, - .items = {DownloadItem{.id = download_id, - .downloadUrl = cuda_toolkit_url.str(), - .localPath = cuda_toolkit_local_path}}, - }}; - - download_service.AddDownloadTask( - downloadCudaToolkitTask, [&](const DownloadTask& finishedTask) { - auto engine_path = - file_manager_utils::GetEnginesContainerPath() / engine; - archive_utils::ExtractArchive( - finishedTask.items[0].localPath.string(), - engine_path.string()); - - try { - std::filesystem::remove(finishedTask.items[0].localPath); - } catch (std::exception& e) { - CTL_ERR("Error removing downloaded file: " << e.what()); - } - }); return; } } @@ -286,86 +308,83 @@ void EngineService::InstallEngine(const std::string& engine, } } -void EngineService::InstallLocalEngine(const std::string& engine, - const std::string& path) { - bool found_cuda = false; +void EngineService::DownloadCuda(const std::string& engine) { auto system_info = system_info_utils::GetSystemInfo(); - std::string cuda_variant = ""; - // Get CPU, GPU info auto cuda_driver_version = system_info_utils::GetCudaVersion(); - CTL_INF("engine: " << engine); - CTL_INF("CUDA version: " << cuda_driver_version); - std::string matched_variant = ""; - - std::vector variants; - if (std::filesystem::exists(path) && std::filesystem::is_directory(path)) { - for (const auto& entry : std::filesystem::directory_iterator(path)) { - CTL_INF("file path: " << entry.path().string()); - if (entry.is_regular_file() && (entry.path().extension() == ".tar.gz" || - entry.path().extension() == ".gz")) { - CTL_INF("file name: " << entry.path().filename().string()); - variants.push_back(entry.path().filename().string()); - if (std::string cf = entry.path().stem().string(); cf == cuda_variant) { - found_cuda = true; - // extract binary - auto engine_path = - file_manager_utils::GetEnginesContainerPath() / engine; - archive_utils::ExtractArchive(path + "/" + cf, engine_path.string()); - } - } - } - } else { - // Folder does not exist, throw exception - CTL_ERR("Folder does not exist: " << path); + if (system_info.os == "mac" || engine == "cortex.onnx") { + // mac and onnx engine does not require cuda toolkit + return; } - if (engine == "cortex.tensorrt-llm") { - matched_variant = engine_matcher_utils::ValidateTensorrtLlm( - variants, system_info.os, cuda_driver_version); - } else if (engine == "cortex.onnx") { - matched_variant = engine_matcher_utils::ValidateOnnx( - variants, system_info.os, system_info.arch); - } else if (engine == "cortex.llamacpp") { - cortex::cpuid::CpuInfo cpu_info; - auto suitable_avx = engine_matcher_utils::GetSuitableAvxVariant(cpu_info); - matched_variant = engine_matcher_utils::Validate( - variants, system_info.os, system_info.arch, suitable_avx, - cuda_driver_version); + if (cuda_driver_version.empty()) { + CTL_WRN("No cuda driver, continue with CPU"); + return; } - - CTL_INF("Matched variant: " << matched_variant); - if (matched_variant.empty()) { - CTL_INF("No variant found for " << system_info.os << "-" - << system_info.arch); - // Go with the remote flow + // download cuda toolkit + const std::string jan_host = "https://catalog.jan.ai"; + const std::string cuda_toolkit_file_name = "cuda.tar.gz"; + const std::string download_id = "cuda"; + + // TODO: we don't have API to retrieve list of cuda toolkit dependencies atm because we hosting it at jan + // will have better logic after https://github.com/janhq/cortex/issues/1046 finished + // for now, assume that we have only 11.7 and 12.4 + auto suitable_toolkit_version = ""; + if (engine == "cortex.tensorrt-llm") { + // for tensorrt-llm, we need to download cuda toolkit v12.4 + suitable_toolkit_version = "12.4"; } else { - auto engine_path = file_manager_utils::GetEnginesContainerPath(); - archive_utils::ExtractArchive(path + "/" + matched_variant, - engine_path.string()); + // llamacpp + auto cuda_driver_semver = + semantic_version_utils::SplitVersion(cuda_driver_version); + if (cuda_driver_semver.major == 11) { + suitable_toolkit_version = "11.7"; + } else if (cuda_driver_semver.major == 12) { + suitable_toolkit_version = "12.4"; + } } - return; - - // Not match any cuda binary, download from remote - if (!found_cuda) {} -} - -void EngineService::UninstallEngine(const std::string& engine) { - // TODO: Unload the model which is currently running on engine_ - // TODO: Unload engine if is loaded - - auto ecp = file_manager_utils::GetEnginesContainerPath(); - auto engine_path = ecp / engine; - - if (!std::filesystem::exists(engine_path)) { - throw std::runtime_error("Engine " + engine + " is not installed!"); + // compare cuda driver version with cuda toolkit version + // cuda driver version should be greater than toolkit version to ensure compatibility + if (semantic_version_utils::CompareSemanticVersion( + cuda_driver_version, suitable_toolkit_version) < 0) { + CTL_ERR("Your Cuda driver version " + << cuda_driver_version + << " is not compatible with cuda toolkit version " + << suitable_toolkit_version); + throw std::runtime_error("Cuda driver is not compatible with cuda toolkit"); } - try { - std::filesystem::remove_all(engine_path); - CTL_INF("Engine " << engine << " uninstalled successfully!"); - } catch (const std::exception& e) { - CTL_ERR("Failed to uninstall engine " << engine << ": " << e.what()); - throw; - } -} + std::ostringstream cuda_toolkit_url; + cuda_toolkit_url << jan_host << "/" << "dist/cuda-dependencies/" + << cuda_driver_version << "/" << system_info.os << "/" + << cuda_toolkit_file_name; + + LOG_DEBUG << "Cuda toolkit download url: " << cuda_toolkit_url.str(); + auto cuda_toolkit_local_path = + file_manager_utils::GetContainerFolderPath( + file_manager_utils::DownloadTypeToString(DownloadType::CudaToolkit)) / + cuda_toolkit_file_name; + LOG_DEBUG << "Download to: " << cuda_toolkit_local_path.string(); + auto downloadCudaToolkitTask{DownloadTask{ + .id = download_id, + .type = DownloadType::CudaToolkit, + .items = {DownloadItem{.id = download_id, + .downloadUrl = cuda_toolkit_url.str(), + .localPath = cuda_toolkit_local_path}}, + }}; + + DownloadService download_service; + download_service.AddDownloadTask( + downloadCudaToolkitTask, [&](const DownloadTask& finishedTask) { + auto engine_path = + file_manager_utils::GetEnginesContainerPath() / engine; + archive_utils::ExtractArchive(finishedTask.items[0].localPath.string(), + engine_path.string()); + + try { + std::filesystem::remove(finishedTask.items[0].localPath); + } catch (std::exception& e) { + CTL_ERR("Error removing downloaded file: " << e.what()); + } + }); +} \ No newline at end of file diff --git a/engine/services/engine_service.h b/engine/services/engine_service.h index 0a03808a0..a86ade226 100644 --- a/engine/services/engine_service.h +++ b/engine/services/engine_service.h @@ -31,7 +31,14 @@ class EngineService { const std::string& version = "latest", const std::string& src = ""); - void InstallLocalEngine(const std::string& engine, const std::string& path); + void UnzipEngine(const std::string& engine, + const std::string& version, + const std::string& path); void UninstallEngine(const std::string& engine); + + private: + void DownloadEngine(const std::string& engine, + const std::string& version = "latest"); + void DownloadCuda(const std::string& engine); }; From 71da2af94e6320860f37f6bb448db4588ca63669 Mon Sep 17 00:00:00 2001 From: vansangpfiev Date: Thu, 19 Sep 2024 16:28:28 +0700 Subject: [PATCH 04/11] fix: add e2e tests --- engine/e2e-test/test_cli_engine_install.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/engine/e2e-test/test_cli_engine_install.py b/engine/e2e-test/test_cli_engine_install.py index 89d49401d..44aab80e2 100644 --- a/engine/e2e-test/test_cli_engine_install.py +++ b/engine/e2e-test/test_cli_engine_install.py @@ -1,4 +1,5 @@ import platform +import tempfile import pytest from test_runner import run @@ -36,3 +37,16 @@ def test_engines_install_pre_release_llamacpp(self): assert "Start downloading" in output, "Should display downloading message" assert exit_code == 0, f"Install engine failed with error: {error}" + def test_engines_should_fallback_to_download_llamacpp_engine_if_not_exists(self): + exit_code, output, error = run( + "Install Engine", ["engines", "install", "cortex.llamacpp", "-s", tempfile.gettempdir()], timeout=None + ) + assert "Start downloading" in output, "Should display downloading message" + assert exit_code == 0, f"Install engine failed with error: {error}" + + def test_engines_should_not_perform_with_dummy_path(self): + exit_code, output, error = run( + "Install Engine", ["engines", "install", "cortex.llamacpp", "-s", "abcpod"], timeout=None + ) + assert "Folder does not exist" in output, "Should display error" + assert exit_code == 0, f"Install engine failed with error: {error}" From 860695e60d71651751001c0e9fd0bcf36bfab0d3 Mon Sep 17 00:00:00 2001 From: vansangpfiev Date: Sat, 21 Sep 2024 14:22:09 +0700 Subject: [PATCH 05/11] fix: refactor --- engine/commands/cortex_upd_cmd.cc | 12 ++-- engine/controllers/engines.cc | 2 +- engine/main.cc | 8 +-- engine/services/engine_service.cc | 98 +++++++++++++++---------------- engine/services/engine_service.h | 27 +++++++-- engine/utils/system_info_utils.h | 5 +- 6 files changed, 83 insertions(+), 69 deletions(-) diff --git a/engine/commands/cortex_upd_cmd.cc b/engine/commands/cortex_upd_cmd.cc index 3c892f6fc..e68a286fc 100644 --- a/engine/commands/cortex_upd_cmd.cc +++ b/engine/commands/cortex_upd_cmd.cc @@ -38,7 +38,7 @@ void CortexUpdCmd::Exec(std::string v) { bool CortexUpdCmd::GetStable(const std::string& v) { auto system_info = system_info_utils::GetSystemInfo(); - CTL_INF("OS: " << system_info.os << ", Arch: " << system_info.arch); + CTL_INF("OS: " << system_info->os << ", Arch: " << system_info->arch); // Download file auto github_host = GetHostName(); @@ -56,7 +56,7 @@ bool CortexUpdCmd::GetStable(const std::string& v) { } if (!HandleGithubRelease(json_data["assets"], - {system_info.os + "-" + system_info.arch})) { + {system_info->os + "-" + system_info->arch})) { return false; } } catch (const nlohmann::json::parse_error& e) { @@ -83,7 +83,7 @@ bool CortexUpdCmd::GetStable(const std::string& v) { bool CortexUpdCmd::GetBeta(const std::string& v) { auto system_info = system_info_utils::GetSystemInfo(); - CTL_INF("OS: " << system_info.os << ", Arch: " << system_info.arch); + CTL_INF("OS: " << system_info->os << ", Arch: " << system_info->arch); // Download file auto github_host = GetHostName(); @@ -113,7 +113,7 @@ bool CortexUpdCmd::GetBeta(const std::string& v) { } if (!HandleGithubRelease(json_data["assets"], - {system_info.os + "-" + system_info.arch})) { + {system_info->os + "-" + system_info->arch})) { return false; } } catch (const nlohmann::json::parse_error& e) { @@ -205,11 +205,11 @@ bool CortexUpdCmd::HandleGithubRelease(const nlohmann::json& assets, bool CortexUpdCmd::GetNightly(const std::string& v) { auto system_info = system_info_utils::GetSystemInfo(); - CTL_INF("OS: " << system_info.os << ", Arch: " << system_info.arch); + CTL_INF("OS: " << system_info->os << ", Arch: " << system_info->arch); // Download file std::string version = v.empty() ? "latest" : std::move(v); - std::string os_arch{system_info.os + "-" + system_info.arch}; + std::string os_arch{system_info->os + "-" + system_info->arch}; const char* paths[] = { "cortex", version.c_str(), diff --git a/engine/controllers/engines.cc b/engine/controllers/engines.cc index e35002e1f..1c1466e5e 100644 --- a/engine/controllers/engines.cc +++ b/engine/controllers/engines.cc @@ -38,7 +38,7 @@ void Engines::InstallEngine( auto jsonResponse = json::parse(res->body); auto assets = jsonResponse["assets"]; - auto os_arch{system_info.os + "-" + system_info.arch}; + auto os_arch{system_info->os + "-" + system_info->arch}; for (auto& asset : assets) { auto assetName = asset["name"].get(); if (assetName.find(os_arch) != std::string::npos) { diff --git a/engine/main.cc b/engine/main.cc index bdac8148c..e7fe9bd22 100644 --- a/engine/main.cc +++ b/engine/main.cc @@ -88,10 +88,10 @@ void RunServer() { int main(int argc, char* argv[]) { // Stop the program if the system is not supported auto system_info = system_info_utils::GetSystemInfo(); - if (system_info.arch == system_info_utils::kUnsupported || - system_info.os == system_info_utils::kUnsupported) { - CTL_ERR("Unsupported OS or architecture: " << system_info.os << ", " - << system_info.arch); + if (system_info->arch == system_info_utils::kUnsupported || + system_info->os == system_info_utils::kUnsupported) { + CTL_ERR("Unsupported OS or architecture: " << system_info->os << ", " + << system_info->arch); return 1; } diff --git a/engine/services/engine_service.cc b/engine/services/engine_service.cc index e3ac8f663..2a36050f7 100644 --- a/engine/services/engine_service.cc +++ b/engine/services/engine_service.cc @@ -12,6 +12,11 @@ using json = nlohmann::json; +EngineService::EngineService() + : hw_inf_{.sys_inf = system_info_utils::GetSystemInfo(), + .cuda_driver_version = system_info_utils::GetCudaVersion()} {} +EngineService::~EngineService() {} + std::optional EngineService::GetEngineInfo( const std::string& engine) const { // if engine is not found in kSupportEngine, throw runtime error @@ -86,20 +91,20 @@ void EngineService::InstallEngine(const std::string& engine, } void EngineService::UnzipEngine(const std::string& engine, - const std::string& version, - const std::string& path) { + const std::string& version, + const std::string& path) { bool found_cuda = false; - auto system_info = system_info_utils::GetSystemInfo(); - // Get CPU, GPU info - auto cuda_driver_version = system_info_utils::GetCudaVersion(); CTL_INF("engine: " << engine); - CTL_INF("CUDA version: " << cuda_driver_version); - std::string matched_variant = ""; + CTL_INF("CUDA version: " << hw_inf_.cuda_driver_version); std::string cuda_variant = "cuda-"; - cuda_variant += cuda_driver_version; + cuda_variant += hw_inf_.cuda_driver_version; cuda_variant += ".tar.gz"; + std::vector variants; + // Loop through all files in the directory + // 1. Push all engine variants to a list + // 2. If cuda version is matched, extract it if (std::filesystem::exists(path) && std::filesystem::is_directory(path)) { for (const auto& entry : std::filesystem::directory_iterator(path)) { CTL_INF("file path: " << entry.path().string()); @@ -124,23 +129,11 @@ void EngineService::UnzipEngine(const std::string& engine, return; } - if (engine == "cortex.tensorrt-llm") { - matched_variant = engine_matcher_utils::ValidateTensorrtLlm( - variants, system_info.os, cuda_driver_version); - } else if (engine == "cortex.onnx") { - matched_variant = engine_matcher_utils::ValidateOnnx( - variants, system_info.os, system_info.arch); - } else if (engine == "cortex.llamacpp") { - cortex::cpuid::CpuInfo cpu_info; - auto suitable_avx = engine_matcher_utils::GetSuitableAvxVariant(cpu_info); - matched_variant = engine_matcher_utils::Validate( - variants, system_info.os, system_info.arch, suitable_avx, - cuda_driver_version); - } - + auto matched_variant = GetMatchedVariant(engine, variants); CTL_INF("Matched variant: " << matched_variant); if (matched_variant.empty()) { - CTL_INF("No variant found for " << system_info.os << "-" << system_info.arch + CTL_INF("No variant found for " << hw_inf_.sys_inf->os << "-" + << hw_inf_.sys_inf->arch << ", will get engine from remote"); // Go with the remote flow DownloadEngine(engine, version); @@ -178,8 +171,7 @@ void EngineService::UninstallEngine(const std::string& engine) { } void EngineService::DownloadEngine(const std::string& engine, - const std::string& version) { - auto system_info = system_info_utils::GetSystemInfo(); + const std::string& version) { auto get_params = [&engine, &version]() -> std::vector { if (version == "latest") { return {"repos", "janhq", engine, "releases", version}; @@ -217,7 +209,7 @@ void EngineService::DownloadEngine(const std::string& engine, } auto assets = body["assets"]; - auto os_arch{system_info.os + "-" + system_info.arch}; + auto os_arch{hw_inf_.sys_inf->os + "-" + hw_inf_.sys_inf->arch}; std::vector variants; for (auto& asset : assets) { @@ -225,24 +217,9 @@ void EngineService::DownloadEngine(const std::string& engine, variants.push_back(asset_name); } - auto cuda_driver_version = system_info_utils::GetCudaVersion(); CTL_INF("engine: " << engine); - CTL_INF("CUDA version: " << cuda_driver_version); - std::string matched_variant = ""; - - if (engine == "cortex.tensorrt-llm") { - matched_variant = engine_matcher_utils::ValidateTensorrtLlm( - variants, system_info.os, cuda_driver_version); - } else if (engine == "cortex.onnx") { - matched_variant = engine_matcher_utils::ValidateOnnx( - variants, system_info.os, system_info.arch); - } else if (engine == "cortex.llamacpp") { - cortex::cpuid::CpuInfo cpu_info; - auto suitable_avx = engine_matcher_utils::GetSuitableAvxVariant(cpu_info); - matched_variant = engine_matcher_utils::Validate( - variants, system_info.os, system_info.arch, suitable_avx, - cuda_driver_version); - } + CTL_INF("CUDA version: " << hw_inf_.cuda_driver_version); + auto matched_variant = GetMatchedVariant(engine, variants); CTL_INF("Matched variant: " << matched_variant); if (matched_variant.empty()) { CTL_ERR("No variant found for " << os_arch); @@ -309,14 +286,12 @@ void EngineService::DownloadEngine(const std::string& engine, } void EngineService::DownloadCuda(const std::string& engine) { - auto system_info = system_info_utils::GetSystemInfo(); - auto cuda_driver_version = system_info_utils::GetCudaVersion(); - if (system_info.os == "mac" || engine == "cortex.onnx") { + if (hw_inf_.sys_inf->os == "mac" || engine == "cortex.onnx") { // mac and onnx engine does not require cuda toolkit return; } - if (cuda_driver_version.empty()) { + if (hw_inf_.cuda_driver_version.empty()) { CTL_WRN("No cuda driver, continue with CPU"); return; } @@ -335,7 +310,7 @@ void EngineService::DownloadCuda(const std::string& engine) { } else { // llamacpp auto cuda_driver_semver = - semantic_version_utils::SplitVersion(cuda_driver_version); + semantic_version_utils::SplitVersion(hw_inf_.cuda_driver_version); if (cuda_driver_semver.major == 11) { suitable_toolkit_version = "11.7"; } else if (cuda_driver_semver.major == 12) { @@ -346,9 +321,9 @@ void EngineService::DownloadCuda(const std::string& engine) { // compare cuda driver version with cuda toolkit version // cuda driver version should be greater than toolkit version to ensure compatibility if (semantic_version_utils::CompareSemanticVersion( - cuda_driver_version, suitable_toolkit_version) < 0) { + hw_inf_.cuda_driver_version, suitable_toolkit_version) < 0) { CTL_ERR("Your Cuda driver version " - << cuda_driver_version + << hw_inf_.cuda_driver_version << " is not compatible with cuda toolkit version " << suitable_toolkit_version); throw std::runtime_error("Cuda driver is not compatible with cuda toolkit"); @@ -356,8 +331,8 @@ void EngineService::DownloadCuda(const std::string& engine) { std::ostringstream cuda_toolkit_url; cuda_toolkit_url << jan_host << "/" << "dist/cuda-dependencies/" - << cuda_driver_version << "/" << system_info.os << "/" - << cuda_toolkit_file_name; + << hw_inf_.cuda_driver_version << "/" << hw_inf_.sys_inf->os + << "/" << cuda_toolkit_file_name; LOG_DEBUG << "Cuda toolkit download url: " << cuda_toolkit_url.str(); auto cuda_toolkit_local_path = @@ -387,4 +362,23 @@ void EngineService::DownloadCuda(const std::string& engine) { CTL_ERR("Error removing downloaded file: " << e.what()); } }); +} + +std::string EngineService::GetMatchedVariant( + const std::string& engine, const std::vector& variants) { + std::string matched_variant; + if (engine == "cortex.tensorrt-llm") { + matched_variant = engine_matcher_utils::ValidateTensorrtLlm( + variants, hw_inf_.sys_inf->os, hw_inf_.cuda_driver_version); + } else if (engine == "cortex.onnx") { + matched_variant = engine_matcher_utils::ValidateOnnx( + variants, hw_inf_.sys_inf->os, hw_inf_.sys_inf->arch); + } else if (engine == "cortex.llamacpp") { + auto suitable_avx = + engine_matcher_utils::GetSuitableAvxVariant(hw_inf_.cpu_inf); + matched_variant = engine_matcher_utils::Validate( + variants, hw_inf_.sys_inf->os, hw_inf_.sys_inf->arch, suitable_avx, + hw_inf_.cuda_driver_version); + } + return matched_variant; } \ No newline at end of file diff --git a/engine/services/engine_service.h b/engine/services/engine_service.h index a86ade226..1a3ab1525 100644 --- a/engine/services/engine_service.h +++ b/engine/services/engine_service.h @@ -1,9 +1,12 @@ #pragma once +#include #include #include #include #include +#include "utils/cpuid/cpu_info.h" +// #include "utils/system_info_utils.h" struct EngineInfo { std::string name; @@ -14,6 +17,9 @@ struct EngineInfo { std::string status; }; +namespace system_info_utils { +struct SystemInfo; +} class EngineService { public: constexpr static auto kIncompatible = "Incompatible"; @@ -23,6 +29,9 @@ class EngineService { const std::vector kSupportEngines = { "cortex.llamacpp", "cortex.onnx", "cortex.tensorrt-llm"}; + EngineService(); + ~EngineService(); + std::optional GetEngineInfo(const std::string& engine) const; std::vector GetEngineInfoList() const; @@ -31,14 +40,24 @@ class EngineService { const std::string& version = "latest", const std::string& src = ""); - void UnzipEngine(const std::string& engine, - const std::string& version, - const std::string& path); + void UnzipEngine(const std::string& engine, const std::string& version, + const std::string& path); void UninstallEngine(const std::string& engine); private: void DownloadEngine(const std::string& engine, - const std::string& version = "latest"); + const std::string& version = "latest"); void DownloadCuda(const std::string& engine); + + std::string GetMatchedVariant(const std::string& engine, + const std::vector& variants); + + private: + struct HardwareInfo { + std::unique_ptr sys_inf; + cortex::cpuid::CpuInfo cpu_inf; + std::string cuda_driver_version; + }; + HardwareInfo hw_inf_; }; diff --git a/engine/utils/system_info_utils.h b/engine/utils/system_info_utils.h index 9cdcc8f05..faea8c003 100644 --- a/engine/utils/system_info_utils.h +++ b/engine/utils/system_info_utils.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include @@ -51,7 +52,7 @@ inline std::string GetGpuArch(const std::string& gpuName) { } } -inline SystemInfo GetSystemInfo() { +inline std::unique_ptr GetSystemInfo() { std::ostringstream arch; std::ostringstream os; @@ -76,7 +77,7 @@ inline SystemInfo GetSystemInfo() { #else os << kUnsupported; #endif - return SystemInfo{os.str(), arch.str()}; + return std::make_unique(os.str(), arch.str()); } inline bool IsNvidiaSmiAvailable() { From 5a6d13a645d624df2a15064235bea6b2473e97c2 Mon Sep 17 00:00:00 2001 From: vansangpfiev Date: Sat, 21 Sep 2024 15:07:40 +0700 Subject: [PATCH 06/11] fix: build ubuntu --- engine/utils/system_info_utils.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/engine/utils/system_info_utils.h b/engine/utils/system_info_utils.h index faea8c003..21946621c 100644 --- a/engine/utils/system_info_utils.h +++ b/engine/utils/system_info_utils.h @@ -23,6 +23,8 @@ constexpr static auto kGpuInfoRegex{ R"((\d+),\s*(\d+),\s*([^,]+),\s*([\d\.]+))"}; struct SystemInfo { + SystemInfo(std::string os, std::string arch) + : os(std::move(os)), arch(std::move(arch)) {} std::string os; std::string arch; }; From 7d0db3a27a46b387756651d2d15deac6a670b91b Mon Sep 17 00:00:00 2001 From: vansangpfiev Date: Mon, 23 Sep 2024 08:37:51 +0700 Subject: [PATCH 07/11] fix: explicit constructor --- engine/utils/system_info_utils.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/engine/utils/system_info_utils.h b/engine/utils/system_info_utils.h index 21946621c..9dbfcc7c9 100644 --- a/engine/utils/system_info_utils.h +++ b/engine/utils/system_info_utils.h @@ -23,7 +23,7 @@ constexpr static auto kGpuInfoRegex{ R"((\d+),\s*(\d+),\s*([^,]+),\s*([\d\.]+))"}; struct SystemInfo { - SystemInfo(std::string os, std::string arch) + explicit SystemInfo(std::string os, std::string arch) : os(std::move(os)), arch(std::move(arch)) {} std::string os; std::string arch; From 85da6276c2a29763d2afea75396e4d9e465ed024 Mon Sep 17 00:00:00 2001 From: vansangpfiev Date: Mon, 23 Sep 2024 14:16:50 +0700 Subject: [PATCH 08/11] fix: comments --- engine/services/engine_service.cc | 16 ++++++++++------ engine/services/engine_service.h | 1 - 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/engine/services/engine_service.cc b/engine/services/engine_service.cc index 2c46844ba..010ee5288 100644 --- a/engine/services/engine_service.cc +++ b/engine/services/engine_service.cc @@ -328,12 +328,16 @@ void EngineService::DownloadCuda(const std::string& engine) { throw std::runtime_error("Cuda driver is not compatible with cuda toolkit"); } - std::ostringstream cuda_toolkit_url; - cuda_toolkit_url << jan_host << "/" << "dist/cuda-dependencies/" - << hw_inf_.cuda_driver_version << "/" << hw_inf_.sys_inf->os - << "/" << cuda_toolkit_file_name; + auto url_obj = url_parser::Url{ + .protocol = "https", + .host = jan_host, + .pathParams = {"dist", "cuda-dependencies", hw_inf_.cuda_driver_version, + hw_inf_.sys_inf->os, cuda_toolkit_file_name}, + }; + + auto cuda_toolkit_url = url_parser::FromUrl(url_obj); - LOG_DEBUG << "Cuda toolkit download url: " << cuda_toolkit_url.str(); + LOG_DEBUG << "Cuda toolkit download url: " << cuda_toolkit_url; auto cuda_toolkit_local_path = file_manager_utils::GetContainerFolderPath( file_manager_utils::DownloadTypeToString(DownloadType::CudaToolkit)) / @@ -343,7 +347,7 @@ void EngineService::DownloadCuda(const std::string& engine) { .id = download_id, .type = DownloadType::CudaToolkit, .items = {DownloadItem{.id = download_id, - .downloadUrl = cuda_toolkit_url.str(), + .downloadUrl = cuda_toolkit_url, .localPath = cuda_toolkit_local_path}}, }}; diff --git a/engine/services/engine_service.h b/engine/services/engine_service.h index 1a3ab1525..5e434bf24 100644 --- a/engine/services/engine_service.h +++ b/engine/services/engine_service.h @@ -6,7 +6,6 @@ #include #include #include "utils/cpuid/cpu_info.h" -// #include "utils/system_info_utils.h" struct EngineInfo { std::string name; From 49e869847b9082d8a1536f9574594f58b1280dc4 Mon Sep 17 00:00:00 2001 From: vansangpfiev Date: Mon, 23 Sep 2024 14:18:29 +0700 Subject: [PATCH 09/11] fix: cuda version --- engine/services/engine_service.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/engine/services/engine_service.cc b/engine/services/engine_service.cc index 010ee5288..e606c98cd 100644 --- a/engine/services/engine_service.cc +++ b/engine/services/engine_service.cc @@ -313,7 +313,7 @@ void EngineService::DownloadCuda(const std::string& engine) { if (cuda_driver_semver.major == 11) { suitable_toolkit_version = "11.7"; } else if (cuda_driver_semver.major == 12) { - suitable_toolkit_version = "12.4"; + suitable_toolkit_version = "12.0"; } } From a349d1415606180bf4a48800edb8eefa8de8c2f8 Mon Sep 17 00:00:00 2001 From: vansangpfiev Date: Mon, 23 Sep 2024 14:24:07 +0700 Subject: [PATCH 10/11] fix: jan_host --- engine/services/engine_service.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/engine/services/engine_service.cc b/engine/services/engine_service.cc index e606c98cd..f61cebed6 100644 --- a/engine/services/engine_service.cc +++ b/engine/services/engine_service.cc @@ -295,7 +295,7 @@ void EngineService::DownloadCuda(const std::string& engine) { return; } // download cuda toolkit - const std::string jan_host = "https://catalog.jan.ai"; + const std::string jan_host = "catalog.jan.ai"; const std::string cuda_toolkit_file_name = "cuda.tar.gz"; const std::string download_id = "cuda"; From 43be1dec0a47b258dd19baf77ae75f4bec65ac78 Mon Sep 17 00:00:00 2001 From: vansangpfiev Date: Mon, 23 Sep 2024 15:05:07 +0700 Subject: [PATCH 11/11] fix: correct cuda version --- engine/services/engine_service.cc | 48 ++++++++++++++++++------------- 1 file changed, 28 insertions(+), 20 deletions(-) diff --git a/engine/services/engine_service.cc b/engine/services/engine_service.cc index f61cebed6..1b1f1d278 100644 --- a/engine/services/engine_service.cc +++ b/engine/services/engine_service.cc @@ -12,6 +12,27 @@ using json = nlohmann::json; +namespace { +std::string GetSuitableCudaVersion(const std::string& engine, + const std::string& cuda_driver_version) { + auto suitable_toolkit_version = ""; + if (engine == "cortex.tensorrt-llm") { + // for tensorrt-llm, we need to download cuda toolkit v12.4 + suitable_toolkit_version = "12.4"; + } else { + // llamacpp + auto cuda_driver_semver = + semantic_version_utils::SplitVersion(cuda_driver_version); + if (cuda_driver_semver.major == 11) { + suitable_toolkit_version = "11.7"; + } else if (cuda_driver_semver.major == 12) { + suitable_toolkit_version = "12.0"; + } + } + return suitable_toolkit_version; +} +} // namespace + EngineService::EngineService() : hw_inf_{.sys_inf = system_info_utils::GetSystemInfo(), .cuda_driver_version = system_info_utils::GetCudaVersion()} {} @@ -98,8 +119,10 @@ void EngineService::UnzipEngine(const std::string& engine, CTL_INF("engine: " << engine); CTL_INF("CUDA version: " << hw_inf_.cuda_driver_version); std::string cuda_variant = "cuda-"; - cuda_variant += hw_inf_.cuda_driver_version; - cuda_variant += ".tar.gz"; + cuda_variant += GetSuitableCudaVersion(engine, hw_inf_.cuda_driver_version) + + "-" + hw_inf_.sys_inf->os + "-" + hw_inf_.sys_inf->arch + + ".tar.gz"; + CTL_INF("cuda_variant: " << cuda_variant); std::vector variants; // Loop through all files in the directory @@ -299,23 +322,8 @@ void EngineService::DownloadCuda(const std::string& engine) { const std::string cuda_toolkit_file_name = "cuda.tar.gz"; const std::string download_id = "cuda"; - // TODO: we don't have API to retrieve list of cuda toolkit dependencies atm because we hosting it at jan - // will have better logic after https://github.com/janhq/cortex/issues/1046 finished - // for now, assume that we have only 11.7 and 12.4 - auto suitable_toolkit_version = ""; - if (engine == "cortex.tensorrt-llm") { - // for tensorrt-llm, we need to download cuda toolkit v12.4 - suitable_toolkit_version = "12.4"; - } else { - // llamacpp - auto cuda_driver_semver = - semantic_version_utils::SplitVersion(hw_inf_.cuda_driver_version); - if (cuda_driver_semver.major == 11) { - suitable_toolkit_version = "11.7"; - } else if (cuda_driver_semver.major == 12) { - suitable_toolkit_version = "12.0"; - } - } + auto suitable_toolkit_version = + GetSuitableCudaVersion(engine, hw_inf_.cuda_driver_version); // compare cuda driver version with cuda toolkit version // cuda driver version should be greater than toolkit version to ensure compatibility @@ -331,7 +339,7 @@ void EngineService::DownloadCuda(const std::string& engine) { auto url_obj = url_parser::Url{ .protocol = "https", .host = jan_host, - .pathParams = {"dist", "cuda-dependencies", hw_inf_.cuda_driver_version, + .pathParams = {"dist", "cuda-dependencies", suitable_toolkit_version, hw_inf_.sys_inf->os, cuda_toolkit_file_name}, };