Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from 34 commits
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
cc2e093
feat: use llama.cpp server
sangjanai Mar 11, 2025
70caa83
Merge branch 'dev' of https://github.com/janhq/cortex.cpp into s/feat…
sangjanai Mar 17, 2025
0968abe
chore: cleanup
sangjanai Mar 17, 2025
668af84
Merge branch 'dev' of https://github.com/janhq/cortex.cpp into s/feat…
sangjanai Mar 17, 2025
219d460
feat: OAI
sangjanai Mar 18, 2025
bb5cc35
Merge branch 'dev' of https://github.com/janhq/cortex.cpp into s/feat…
sangjanai Mar 18, 2025
8c4ca06
fix: wait for child process up
sangjanai Mar 19, 2025
220a974
Merge branch 'dev' of https://github.com/janhq/cortex.cpp into s/feat…
sangjanai Mar 19, 2025
0f2fa6e
chore: cleanup
sangjanai Mar 19, 2025
6dd7f7c
chore: cleanup
sangjanai Mar 19, 2025
bc92732
chore: fix unit tests
sangjanai Mar 19, 2025
5a68356
Merge branch 's/feat/spawn-llama-cpp' of https://github.com/janhq/cor…
sangjanai Mar 19, 2025
7c4d964
chore: cleanup
sangjanai Mar 19, 2025
1e5beaf
chore: cleanup
sangjanai Mar 19, 2025
b7b772b
fix: unit tests
sangjanai Mar 19, 2025
7c66135
fix: e2e tests
sangjanai Mar 19, 2025
af28b07
Merge branch 's/feat/spawn-llama-cpp' of https://github.com/janhq/cor…
sangjanai Mar 19, 2025
4705a1b
chore: cleanup
sangjanai Mar 19, 2025
6454eca
fix: e2e tests
sangjanai Mar 19, 2025
efe5a08
fix: validation
sangjanai Mar 19, 2025
a2886d2
fix: change GH user agent
sangjanai Mar 19, 2025
cf1a2ca
chore: cleanup
sangjanai Mar 19, 2025
282aae3
chore: fix unit tests
sangjanai Mar 19, 2025
e3dfea3
fix: e2e tests
sangjanai Mar 19, 2025
7438f94
fix: validation
sangjanai Mar 19, 2025
bf16c81
fix: change GH user agent
sangjanai Mar 19, 2025
f7cb0ae
chore: windows patch
vansangpfiev Mar 20, 2025
886222d
fix: windows
vansangpfiev Mar 20, 2025
87b2aa4
chore: log
vansangpfiev Mar 20, 2025
6d0215d
Merge branch 'dev' of https://github.com/menloresearch/cortex.cpp int…
sangjanai Mar 21, 2025
f866d5f
fix: handle macos 12 variants
sangjanai Mar 21, 2025
3d0b847
Merge branch 's/feat/spawn-llama-cpp' of https://github.com/menlorese…
sangjanai Mar 21, 2025
18e958e
Merge branch 'dev' of https://github.com/menloresearch/cortex.cpp int…
sangjanai Mar 21, 2025
de0de94
chore: e2e tests
sangjanai Mar 21, 2025
b783b22
chore: major version macos
sangjanai Mar 21, 2025
7832a5a
fix: windows e2e tests
vansangpfiev Mar 21, 2025
86cc89c
Merge branch 's/feat/spawn-llama-cpp' of github.com:janhq/nitro into …
vansangpfiev Mar 21, 2025
1c1146f
fix: engine list
sangjanai Mar 21, 2025
7487304
fix: macos filter
sangjanai Mar 21, 2025
1e99861
chore: skips some tests for linux arm
sangjanai Mar 21, 2025
70adc6e
fix: terminate process windows
vansangpfiev Mar 24, 2025
9846ad8
chore: release CIs (#2171)
vansangpfiev Mar 24, 2025
aba833e
fix: remove v in the llama-engine wget
sangjanai Mar 24, 2025
b239378
fix: add start time for model
sangjanai Mar 24, 2025
ca2180c
Merge branch 'dev' of https://github.com/menloresearch/cortex.cpp int…
sangjanai Mar 27, 2025
8ce2ac6
Merge branch 'dev' into s/feat/spawn-llama-cpp
vansangpfiev Apr 2, 2025
5f36501
chore: quality gate
sangjanai Apr 2, 2025
300f368
Merge branch 'dev' into s/feat/spawn-llama-cpp
vansangpfiev Apr 3, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file modified .github/patches/windows/msvcp140.dll
Binary file not shown.
Binary file modified .github/patches/windows/vcruntime140.dll
Binary file not shown.
Binary file modified .github/patches/windows/vcruntime140_1.dll
Binary file not shown.
3 changes: 0 additions & 3 deletions docs/docs/engines/engine-extension.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,6 @@ class EngineI {
std::shared_ptr<Json::Value> json_body,
std::function<void(Json::Value&&, Json::Value&&)>&& callback) = 0;

// Compatibility and model management
virtual bool IsSupported(const std::string& f) = 0;

virtual void GetModels(
std::shared_ptr<Json::Value> jsonBody,
std::function<void(Json::Value&&, Json::Value&&)>&& callback) = 0;
Expand Down
12 changes: 6 additions & 6 deletions docs/static/openapi/cortex.json
Original file line number Diff line number Diff line change
Expand Up @@ -2754,7 +2754,7 @@
},
"version": {
"type": "string",
"example": "0.1.35-28.10.24"
"example": "b4920"
}
}
}
Expand All @@ -2763,11 +2763,11 @@
{
"engine": "llama-cpp",
"name": "mac-arm64",
"version": "0.1.35-28.10.24"
"version": "b4920"
},
{
"engine": "llama-cpp",
"name": "linux-amd64-avx",
"name": "linux-avx-x64",
"version": "0.1.35-27.10.24"
}
]
Expand Down Expand Up @@ -2901,7 +2901,7 @@
"name": {
"type": "string",
"description": "The name of the variant, including OS, architecture, and capabilities",
"example": "linux-amd64-avx-cuda-11-7"
"example": "linux-avx-x64-cuda-11-7"
},
"created_at": {
"type": "string",
Expand Down Expand Up @@ -2973,7 +2973,7 @@
},
"name": {
"type": "string",
"example": "0.1.39-linux-amd64-avx-cuda-11-7"
"example": "llama-b4920-bin-linux-avx-cuda-cu11.7"
},
"size": {
"type": "integer",
Expand Down Expand Up @@ -3250,7 +3250,7 @@
},
"version": {
"type": "string",
"example": "0.1.35-28.10.24"
"example": "b4920"
}
}
}
Expand Down
10 changes: 10 additions & 0 deletions engine/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@ add_executable(${TARGET_NAME} main.cc
${CMAKE_CURRENT_SOURCE_DIR}/utils/process/utils.cc

${CMAKE_CURRENT_SOURCE_DIR}/extensions/remote-engine/remote_engine.cc
${CMAKE_CURRENT_SOURCE_DIR}/extensions/local-engine/local_engine.cc

)

Expand Down Expand Up @@ -222,3 +223,12 @@ set_target_properties(${TARGET_NAME} PROPERTIES
RUNTIME_OUTPUT_DIRECTORY_RELEASE ${CMAKE_BINARY_DIR}
RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}
)

if(MSVC)
add_custom_command(
TARGET ${TARGET_NAME} POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy_directory
${CMAKE_CURRENT_SOURCE_DIR}/../.github/patches/windows
${CMAKE_BINARY_DIR}/
)
endif()
2 changes: 1 addition & 1 deletion engine/cli/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ add_executable(${TARGET_NAME} main.cc
${CMAKE_CURRENT_SOURCE_DIR}/../services/hardware_service.cc
${CMAKE_CURRENT_SOURCE_DIR}/../services/database_service.cc
${CMAKE_CURRENT_SOURCE_DIR}/../extensions/remote-engine/remote_engine.cc

${CMAKE_CURRENT_SOURCE_DIR}/../extensions/local-engine/local_engine.cc
${CMAKE_CURRENT_SOURCE_DIR}/../extensions/template_renderer.cc

${CMAKE_CURRENT_SOURCE_DIR}/utils/easywsclient.cc
Expand Down
4 changes: 2 additions & 2 deletions engine/cli/command_line_parser.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#include "services/engine_service.h"
#include "utils/file_manager_utils.h"
#include "utils/logging_utils.h"
#include "utils/task_queue.h"

namespace {
constexpr const auto kCommonCommandsGroup = "Common Commands";
Expand All @@ -50,8 +51,7 @@ CommandLineParser::CommandLineParser()
download_service_{std::make_shared<DownloadService>()},
dylib_path_manager_{std::make_shared<cortex::DylibPathManager>()},
db_service_{std::make_shared<DatabaseService>()},
engine_service_{std::make_shared<EngineService>(
download_service_, dylib_path_manager_, db_service_)} {}
engine_service_{std::make_shared<EngineService>(dylib_path_manager_)} {}

bool CommandLineParser::SetupCommand(int argc, char** argv) {
app_.usage("Usage:\n" + commands::GetCortexBinary() +
Expand Down
4 changes: 2 additions & 2 deletions engine/cli/commands/cortex_upd_cmd.cc
Original file line number Diff line number Diff line change
Expand Up @@ -532,10 +532,10 @@ bool CortexUpdCmd::GetLinuxInstallScript(const std::string& v,
const std::string& channel) {
std::vector<std::string> path_list;
if (channel == "nightly") {
path_list = {"menloresearch", "cortex.cpp", "dev", "engine",
path_list = {kMenloOrg, "cortex.cpp", "dev", "engine",
"templates", "linux", "install.sh"};
} else {
path_list = {"menloresearch", "cortex.cpp", "main", "engine",
path_list = {kMenloOrg, "cortex.cpp", "main", "engine",
"templates", "linux", "install.sh"};
}
auto url_obj = url_parser::Url{
Expand Down
4 changes: 2 additions & 2 deletions engine/cli/commands/cortex_upd_cmd.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,9 @@ inline std::vector<std::string> GetReleasePath() {
if (CORTEX_VARIANT == file_manager_utils::kNightlyVariant) {
return {"cortex", "latest", "version.json"};
} else if (CORTEX_VARIANT == file_manager_utils::kBetaVariant) {
return {"repos", "menloresearch", "cortex.cpp", "releases"};
return {"repos", kMenloOrg, "cortex.cpp", "releases"};
} else {
return {"repos", "menloresearch", "cortex.cpp", "releases", "latest"};
return {"repos", kMenloOrg, "cortex.cpp", "releases", "latest"};
}
}

Expand Down
5 changes: 4 additions & 1 deletion engine/cli/commands/engine_install_cmd.cc
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,10 @@ bool EngineInstallCmd::Exec(const std::string& engine,
std::vector<std::string> variant_selections;
for (const auto& variant : variant_result.value()) {
auto v_name = variant["name"].asString();
if (string_utils::StringContainsIgnoreCase(v_name, hw_inf_.sys_inf->os) &&
if ((string_utils::StringContainsIgnoreCase(v_name,
hw_inf_.sys_inf->os) ||
(hw_inf_.sys_inf->os == kLinuxOs &&
string_utils::StringContainsIgnoreCase(v_name, kUbuntuOs))) &&
string_utils::StringContainsIgnoreCase(v_name,
hw_inf_.sys_inf->arch)) {
variant_selections.push_back(variant["name"].asString());
Expand Down
6 changes: 2 additions & 4 deletions engine/cli/commands/server_start_cmd.cc
Original file line number Diff line number Diff line change
Expand Up @@ -106,10 +106,8 @@ bool ServerStartCmd::Exec(const std::string& host, int port,
#else
std::vector<std::string> commands;
// Some engines requires to add lib search path before process being created
auto download_srv = std::make_shared<DownloadService>();
auto dylib_path_mng = std::make_shared<cortex::DylibPathManager>();
auto db_srv = std::make_shared<DatabaseService>();
EngineService(download_srv, dylib_path_mng, db_srv).RegisterEngineLibPath();
EngineService(std::make_shared<cortex::DylibPathManager>())
.RegisterEngineLibPath();

std::string p = cortex_utils::GetCurrentPath() + "/" + exe;
commands.push_back(p);
Expand Down
2 changes: 1 addition & 1 deletion engine/cli/main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ int main(int argc, char* argv[]) {
auto get_latest_version = []() -> cpp::result<std::string, std::string> {
try {
auto res = github_release_utils::GetReleaseByVersion(
"menloresearch", "cortex.llamacpp", "latest");
kGgmlOrg, kLlamaRepo, "latest");
if (res.has_error()) {
CTL_ERR("Failed to get latest llama.cpp version: " << res.error());
return cpp::fail("Failed to get latest llama.cpp version: " +
Expand Down
4 changes: 2 additions & 2 deletions engine/cli/utils/download_progress.cc
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,8 @@ bool DownloadProgress::Handle(
size_t max_length = 20) -> std::string {
// Check the length of the input string
if (str.length() >= max_length) {
return str.substr(
0, max_length); // Return truncated string if it's too long
return str.substr(0, max_length - 3) +
".. "; // Return truncated string if it's too long
}

// Calculate the number of spaces needed
Expand Down
3 changes: 3 additions & 0 deletions engine/controllers/engines.cc
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,7 @@ void Engines::GetEngineVariants(
releases.append(json.value());
}
}
CTL_INF(releases.toStyledString());
auto resp = cortex_utils::CreateCortexHttpJsonResponse(releases);
resp->setStatusCode(k200OK);
callback(resp);
Expand All @@ -177,6 +178,8 @@ void Engines::InstallEngine(
}
norm_version = version;
}
CTL_INF("version: " << norm_version
<< ", norm_variant: " << norm_variant.value_or(""));

auto result =
engine_service_->InstallEngineAsync(engine, norm_version, norm_variant);
Expand Down
2 changes: 1 addition & 1 deletion engine/controllers/server.cc
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ void server::ProcessStreamRes(std::function<void(const HttpResponsePtr&)> cb,
auto err_or_done = std::make_shared<std::atomic_bool>(false);
auto chunked_content_provider = [this, q, err_or_done, engine_type, model_id](
char* buf,
std::size_t buf_size) -> std::size_t {
std::size_t buf_size) -> std::size_t {
if (buf == nullptr) {
LOG_TRACE << "Buf is null";
if (!(*err_or_done)) {
Expand Down
3 changes: 0 additions & 3 deletions engine/cortex-common/EngineI.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,6 @@ class EngineI {
std::shared_ptr<Json::Value> json_body,
std::function<void(Json::Value&&, Json::Value&&)>&& callback) = 0;

// For backward compatible checking
virtual bool IsSupported(const std::string& f) = 0;

// Get list of running models
virtual void GetModels(
std::shared_ptr<Json::Value> jsonBody,
Expand Down
2 changes: 0 additions & 2 deletions engine/cortex-common/remote_enginei.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
#pragma once

#pragma once

#include <functional>
#include <memory>

Expand Down
12 changes: 6 additions & 6 deletions engine/e2e-test/api/engines/test_api_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,14 @@ def test_engines_get_llamacpp_should_be_successful(self):

# engines install
def test_engines_install_llamacpp_specific_version_and_variant(self):
data = {"version": "v0.1.40-b4354", "variant": "linux-amd64-avx"}
data = {"version": "b4932", "variant": "linux-avx-x64"}
response = requests.post(
"http://localhost:3928/v1/engines/llama-cpp/install", json=data
)
assert response.status_code == 200

def test_engines_install_llamacpp_specific_version_and_null_variant(self):
data = {"version": "v0.1.40-b4354"}
data = {"version": "b4932"}
response = requests.post(
"http://localhost:3928/v1/engines/llama-cpp/install", json=data
)
Expand All @@ -55,14 +55,14 @@ async def test_engines_install_uninstall_llamacpp_should_be_successful(self):
@pytest.mark.asyncio
async def test_engines_install_uninstall_llamacpp_with_only_version_should_be_failed(self):
# install first
data = {"variant": "mac-arm64"}
data = {"variant": "linux-avx-x64"}
install_response = requests.post(
"http://127.0.0.1:3928/v1/engines/llama-cpp/install", json=data
)
await wait_for_websocket_download_success_event(timeout=120)
assert install_response.status_code == 200

data = {"version": "v0.1.35"}
data = {"version": "b4932"}
response = requests.delete(
"http://localhost:3928/v1/engines/llama-cpp/install", json=data
)
Expand All @@ -72,7 +72,7 @@ async def test_engines_install_uninstall_llamacpp_with_only_version_should_be_fa
@pytest.mark.asyncio
async def test_engines_install_uninstall_llamacpp_with_variant_should_be_successful(self):
# install first
data = {"variant": "mac-arm64"}
data = {"variant": "linux-avx-x64"}
install_response = requests.post(
"http://127.0.0.1:3928/v1/engines/llama-cpp/install", json=data
)
Expand All @@ -85,7 +85,7 @@ async def test_engines_install_uninstall_llamacpp_with_variant_should_be_success
def test_engines_install_uninstall_llamacpp_with_specific_variant_and_version_should_be_successful(
self,
):
data = {"variant": "mac-arm64", "version": "v0.1.35"}
data = {"variant": "linux-avx-x64", "version": "b4932"}
# install first
install_response = requests.post(
"http://localhost:3928/v1/engines/llama-cpp/install", json=data
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import requests
from utils.test_runner import start_server, stop_server, get_latest_pre_release_tag

latest_pre_release_tag = get_latest_pre_release_tag("menloresearch", "cortex.llamacpp")
latest_pre_release_tag = get_latest_pre_release_tag("menloresearch", "llama.cpp")

class TestApiEngineInstall:

Expand All @@ -23,7 +23,7 @@ def test_engines_install_llamacpp_should_be_successful(self):
assert response.status_code == 200

def test_engines_install_llamacpp_specific_version_and_variant(self):
data = {"version": latest_pre_release_tag, "variant": "linux-amd64-avx"}
data = {"version": latest_pre_release_tag, "variant": "linux-avx-x64"}
response = requests.post(
"http://localhost:3928/v1/engines/llama-cpp/install", json=data
)
Expand Down
4 changes: 2 additions & 2 deletions engine/e2e-test/api/engines/test_api_get_default_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ def setup_and_teardown(self):
def test_api_get_default_engine_successfully(self):
# Data test
engine= "llama-cpp"
name= "linux-amd64-avx"
version= "v0.1.35-27.10.24"
name= "linux-avx-x64"
version= "b4932"

data = {"version": version, "variant": name}
post_install_url = f"http://localhost:3928/v1/engines/{engine}/install"
Expand Down
4 changes: 2 additions & 2 deletions engine/e2e-test/api/engines/test_api_get_list_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ def setup_and_teardown(self):
def test_api_get_list_engines_successfully(self):
# Data test
engine= "llama-cpp"
name= "linux-amd64-avx"
version= "v0.1.35-27.10.24"
name= "linux-avx-x64"
version= "b4932"

post_install_url = f"http://localhost:3928/v1/engines/{engine}/install"
response = requests.delete(
Expand Down
4 changes: 2 additions & 2 deletions engine/e2e-test/api/engines/test_api_post_default_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ def setup_and_teardown(self):
def test_api_set_default_engine_successfully(self):
# Data test
engine= "llama-cpp"
name= "linux-amd64-avx"
version= "v0.1.35-27.10.24"
name= "linux-avx-x64"
version= "b4932"

data = {"version": version, "variant": name}
post_install_url = f"http://localhost:3928/v1/engines/{engine}/install"
Expand Down
20 changes: 0 additions & 20 deletions engine/e2e-test/api/hardware/test_api_get_hardware.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,25 +88,6 @@ def test_api_get_hardware_successfully(self):
"example": True,
"description": "Indicates if the GPU is currently activated."
},
"additional_information": {
"type": "object",
"properties": {
"compute_cap": {
"type": "string",
"example": "8.6",
"description": "The compute capability of the GPU."
},
"driver_version": {
"type": "string",
"example": "535.183",
"description": "The version of the installed driver."
}
},
"required": [
"compute_cap",
"driver_version"
]
},
"free_vram": {
"type": "integer",
"example": 23983,
Expand Down Expand Up @@ -140,7 +121,6 @@ def test_api_get_hardware_successfully(self):
},
"required": [
"activated",
"additional_information",
"free_vram",
"id",
"name",
Expand Down
1 change: 1 addition & 0 deletions engine/e2e-test/api/model/test_api_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ async def test_models_start_stop_should_be_successful(self):
time.sleep(30)

print("Pull model")
requests.delete("http://localhost:3928/v1/models/tinyllama:1b")
json_body = {"model": "tinyllama:1b"}
response = requests.post("http://localhost:3928/v1/models/pull", json=json_body)
assert response.status_code == 200, f"Failed to pull model: tinyllama:1b"
Expand Down
18 changes: 1 addition & 17 deletions engine/e2e-test/cli/engines/test_cli_engine_install.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,25 +31,9 @@ def test_engines_install_llamacpp_should_be_successfully(self):
assert len(response.json()) > 0
assert exit_code == 0, f"Install engine failed with error: {error}"

@pytest.mark.skipif(reason="Ignore onnx-runtime test")
def test_engines_install_onnx_on_macos_should_be_failed(self):
exit_code, output, error = run(
"Install Engine", ["engines", "install", "onnxruntime"]
)
assert "is not supported on" in output, "Should display error message"
assert exit_code == 0, f"Install engine failed with error: {error}"

@pytest.mark.skipif(reason="Ignore tensorrt-llm test")
def test_engines_install_onnx_on_tensorrt_should_be_failed(self):
exit_code, output, error = run(
"Install Engine", ["engines", "install", "tensorrt-llm"]
)
assert "is not supported on" in output, "Should display error message"
assert exit_code == 0, f"Install engine failed with error: {error}"

@pytest.mark.skipif(platform.system() == "Windows", reason="Progress bar log issue on Windows")
def test_engines_install_pre_release_llamacpp(self):
engine_version = "v0.1.43"
engine_version = "b4932"
exit_code, output, error = run(
"Install Engine",
["engines", "install", "llama-cpp", "-v", engine_version],
Expand Down
Loading
Loading