@@ -20,7 +20,6 @@ constexpr static auto kTensorrtLlmEngine = "cortex.tensorrt-llm";
2020} // namespace
2121
2222server::server () {
23-
2423// Some default values for now below
2524// log_disable(); // Disable the log to file feature, reduce bloat for
2625// target
@@ -297,7 +296,7 @@ void server::LoadModel(const HttpRequestPtr& req,
297296 get_engine_path (engine_type);
298297#if defined(WIN32)
299298 auto ws = std::wstring (abs_path.begin (), abs_path.end ());
300- if (AddDllDirectory (ws.c_str ()) == 0 ) {
299+ if (AddDllDirectory (ws.c_str ()) == 0 ) {
301300 LOG_WARN << " Could not add dll directory: " << abs_path;
302301 }
303302#endif
@@ -335,6 +334,36 @@ void server::LoadModel(const HttpRequestPtr& req,
335334 LOG_TRACE << " Done load model" ;
336335}
337336
337+ void server::UnloadEngine (
338+ const HttpRequestPtr& req,
339+ std::function<void (const HttpResponsePtr&)>&& callback) {
340+ if (!HasFieldInReq (req, callback, " engine" )) {
341+ return ;
342+ }
343+
344+ auto engine_type =
345+ (*(req->getJsonObject ())).get (" engine" , cur_engine_type_).asString ();
346+ if (!IsEngineLoaded (engine_type)) {
347+ Json::Value res;
348+ res[" message" ] = " Engine is not loaded yet" ;
349+ auto resp = cortex_utils::CreateCortexHttpJsonResponse (res);
350+ resp->setStatusCode (k409Conflict);
351+ callback (resp);
352+ LOG_WARN << " Engine is not loaded yet" ;
353+ return ;
354+ }
355+
356+ EngineI* e = std::get<EngineI*>(engines_[engine_type].engine );
357+ delete e;
358+ engines_.erase (engine_type);
359+ LOG_INFO << " Unloaded engine " + engine_type;
360+ Json::Value res;
361+ res[" message" ] = " Unloaded engine " + engine_type;
362+ auto resp = cortex_utils::CreateCortexHttpJsonResponse (res);
363+ resp->setStatusCode (k200OK);
364+ callback (resp);
365+ }
366+
338367void server::ProcessStreamRes (std::function<void (const HttpResponsePtr&)> cb,
339368 std::shared_ptr<SyncQueue> q) {
340369 auto err_or_done = std::make_shared<std::atomic_bool>(false );
0 commit comments