Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit 97e5636

Browse files
authored
chore: add more checks and logs when load file (#1772)
1 parent a6d9da3 commit 97e5636

File tree

5 files changed

+29
-24
lines changed

5 files changed

+29
-24
lines changed

engine/controllers/models.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -184,8 +184,8 @@ void Models::ListModel(
184184
obj["model"] = model_entry.model;
185185
obj["model"] = model_entry.model;
186186
auto es = model_service_->GetEstimation(model_entry.model);
187-
if (es.has_value()) {
188-
obj["recommendation"] = hardware::ToJson(es.value());
187+
if (es.has_value() && !!es.value()) {
188+
obj["recommendation"] = hardware::ToJson(*(es.value()));
189189
}
190190
data.append(std::move(obj));
191191
yaml_handler.Reset();

engine/services/model_service.cc

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -341,9 +341,10 @@ cpp::result<DownloadTask, std::string> ModelService::HandleDownloadUrlAsync(
341341
return download_service_->AddTask(downloadTask, on_finished);
342342
}
343343

344-
cpp::result<hardware::Estimation, std::string> ModelService::GetEstimation(
345-
const std::string& model_handle, const std::string& kv_cache, int n_batch,
346-
int n_ubatch) {
344+
cpp::result<std::optional<hardware::Estimation>, std::string>
345+
ModelService::GetEstimation(const std::string& model_handle,
346+
const std::string& kv_cache, int n_batch,
347+
int n_ubatch) {
347348
namespace fs = std::filesystem;
348349
namespace fmu = file_manager_utils;
349350
cortex::db::Models modellist_handler;
@@ -918,7 +919,7 @@ cpp::result<bool, std::string> ModelService::GetModelStatus(
918919
if (status == drogon::k200OK) {
919920
return true;
920921
} else {
921-
CTL_ERR("Model failed to get model status with status code: " << status);
922+
CTL_WRN("Model failed to get model status with status code: " << status);
922923
return cpp::fail("Model failed to get model status: " +
923924
data["message"].asString());
924925
}
@@ -1146,13 +1147,13 @@ ModelService::MayFallbackToCpu(const std::string& model_path, int ngl,
11461147
.free_vram_MiB = free_vram_MiB};
11471148
auto es = hardware::EstimateLLaMACppRun(model_path, rc);
11481149

1149-
if (es.gpu_mode.vram_MiB > free_vram_MiB && is_cuda) {
1150-
CTL_WRN("Not enough VRAM - " << "required: " << es.gpu_mode.vram_MiB
1150+
if (!!es && (*es).gpu_mode.vram_MiB > free_vram_MiB && is_cuda) {
1151+
CTL_WRN("Not enough VRAM - " << "required: " << (*es).gpu_mode.vram_MiB
11511152
<< ", available: " << free_vram_MiB);
11521153
}
11531154

1154-
if (es.cpu_mode.ram_MiB > free_ram_MiB) {
1155-
CTL_WRN("Not enough RAM - " << "required: " << es.cpu_mode.ram_MiB
1155+
if (!!es && (*es).cpu_mode.ram_MiB > free_ram_MiB) {
1156+
CTL_WRN("Not enough RAM - " << "required: " << (*es).cpu_mode.ram_MiB
11561157
<< ", available: " << free_ram_MiB);
11571158
}
11581159

engine/services/model_service.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ class ModelService {
9797

9898
bool HasModel(const std::string& id) const;
9999

100-
cpp::result<hardware::Estimation, std::string> GetEstimation(
100+
cpp::result<std::optional<hardware::Estimation>, std::string> GetEstimation(
101101
const std::string& model_handle, const std::string& kv_cache = "f16",
102102
int n_batch = 2048, int n_ubatch = 2048);
103103

engine/utils/hardware/gguf/gguf_file.h

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include <unordered_set>
1212
#include <variant>
1313
#include <vector>
14+
#include <optional>
1415

1516
#ifdef _WIN32
1617
#include <io.h>
@@ -23,13 +24,14 @@
2324

2425
#include "ggml.h"
2526
#include "utils/string_utils.h"
27+
#include "utils/logging_utils.h"
2628

2729
// #define GGUF_LOG(msg) \
2830
// do { \
2931
// std::cout << __FILE__ << "(@" << __LINE__ << "): " << msg << '\n'; \
3032
// } while (false)
3133

32-
#define GGUF_LOG(msg)
34+
#define GGUF_LOG(msg)
3335
namespace hardware {
3436
#undef min
3537
#undef max
@@ -169,8 +171,6 @@ inline std::string to_string(const GGUFMetadataKV& kv) {
169171
return "Invalid type ";
170172
}
171173

172-
173-
174174
struct GGUFTensorInfo {
175175
/* Basic */
176176
std::string name;
@@ -208,14 +208,14 @@ struct GGUFHelper {
208208
CreateFileA(file_path.c_str(), GENERIC_READ, FILE_SHARE_READ, nullptr,
209209
OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, nullptr);
210210
if (file_handle == INVALID_HANDLE_VALUE) {
211-
std::cout << "Failed to open file" << std::endl;
211+
CTL_INF("Failed to open file: " << file_path);
212212
return false;
213213
}
214214
// Get the file size
215215
LARGE_INTEGER file_size_struct;
216216
if (!GetFileSizeEx(file_handle, &file_size_struct)) {
217217
CloseHandle(file_handle);
218-
std::cout << "Failed to open file" << std::endl;
218+
CTL_INF("Failed to get file size: " << file_path);
219219
return false;
220220
}
221221
file_size = static_cast<size_t>(file_size_struct.QuadPart);
@@ -225,7 +225,7 @@ struct GGUFHelper {
225225
CreateFileMappingA(file_handle, nullptr, PAGE_READONLY, 0, 0, nullptr);
226226
if (file_mapping == nullptr) {
227227
CloseHandle(file_handle);
228-
std::cout << "Failed to create file mapping" << std::endl;
228+
CTL_INF("Failed to create file mapping: " << file_path);
229229
return false;
230230
}
231231

@@ -235,7 +235,7 @@ struct GGUFHelper {
235235
if (data == nullptr) {
236236
CloseHandle(file_mapping);
237237
CloseHandle(file_handle);
238-
std::cout << "Failed to map file" << std::endl;
238+
CTL_INF("Failed to map file:: " << file_path);
239239
return false;
240240
}
241241

@@ -479,10 +479,12 @@ struct GGUFFile {
479479
double model_bits_per_weight;
480480
};
481481

482-
inline GGUFFile ParseGgufFile(const std::string& path) {
482+
inline std::optional<GGUFFile> ParseGgufFile(const std::string& path) {
483483
GGUFFile gf;
484484
GGUFHelper h;
485-
h.OpenAndMMap(path);
485+
if(!h.OpenAndMMap(path)) {
486+
return std::nullopt;
487+
}
486488

487489
GGUFMagic magic = h.Read<GGUFMagic>();
488490
// GGUF_LOG("magic: " << magic);

engine/utils/hardware/gguf/gguf_file_estimate.h

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -62,20 +62,22 @@ inline float GetQuantBit(const std::string& kv_cache_t) {
6262
return 16.0;
6363
}
6464

65-
inline Estimation EstimateLLaMACppRun(const std::string& file_path,
66-
const RunConfig& rc) {
65+
inline std::optional<Estimation> EstimateLLaMACppRun(
66+
const std::string& file_path, const RunConfig& rc) {
6767
Estimation res;
6868
// token_embeddings_size = n_vocab * embedding_length * 2 * quant_bit/16 bytes
6969
//RAM = token_embeddings_size + ((total_ngl-ngl) >=1 ? Output_layer_size + (total_ngl - ngl - 1 ) / (total_ngl-1) * (total_file_size - token_embeddings_size - Output_layer_size) : 0 ) (bytes)
7070

7171
// VRAM = total_file_size - RAM (bytes)
7272
auto gf = ParseGgufFile(file_path);
73+
if (!gf)
74+
return std::nullopt;
7375
int32_t embedding_length = 0;
7476
int64_t n_vocab = 0;
7577
int32_t num_block = 0;
7678
int32_t total_ngl = 0;
7779
auto file_size = std::filesystem::file_size(file_path);
78-
for (auto const& kv : gf.header.metadata_kv) {
80+
for (auto const& kv : (*gf).header.metadata_kv) {
7981
if (kv.key.find("embedding_length") != std::string::npos) {
8082
embedding_length = std::any_cast<uint32_t>(kv.value);
8183
} else if (kv.key == "tokenizer.ggml.tokens") {
@@ -92,7 +94,7 @@ inline Estimation EstimateLLaMACppRun(const std::string& file_path,
9294
int32_t quant_bit_in = 0;
9395
int32_t quant_bit_out = 0;
9496

95-
for (auto const& ti : gf.tensor_infos) {
97+
for (auto const& ti : (*gf).tensor_infos) {
9698
if (ti->name == "output.weight") {
9799
quant_bit_out = GetQuantBit(ti->type);
98100
// std::cout << ti->type << std::endl;

0 commit comments

Comments
 (0)