@@ -5085,10 +5085,10 @@ static struct ggml_cgraph * llama_build_graph(
50855085 { OFFLOAD_FUNC_NOP, " CPU" },
50865086 { OFFLOAD_FUNC_OUT, " CPU" },
50875087#ifdef GGML_USE_CUBLAS
5088- { OFFLOAD_FUNC, " GPU (CUDA)" },
5089- { OFFLOAD_FUNC_KQ, " GPU (CUDA) KQ" },
5090- { OFFLOAD_FUNC_V, " GPU (CUDA) V" },
5091- { OFFLOAD_FUNC_NR, " GPU (CUDA) NR" },
5088+ { OFFLOAD_FUNC, " GPU (CUDA)" },
5089+ { OFFLOAD_FUNC_KQ, " GPU (CUDA) KQ" },
5090+ { OFFLOAD_FUNC_V, " GPU (CUDA) V" },
5091+ { OFFLOAD_FUNC_NR, " GPU (CUDA) NR" },
50925092 { OFFLOAD_FUNC_EMB, " GPU (CUDA) EMB" },
50935093#else
50945094 { OFFLOAD_FUNC, " CPU" },
@@ -5103,11 +5103,11 @@ static struct ggml_cgraph * llama_build_graph(
51035103 llm_offload_func_e func_e = k_offload_func_trie.find (name);
51045104
51055105 if (func_e == OFFLOAD_FUNC_NOP) {
5106- // if a tensor hasn't been offloaded, we warn the user
5107- if (worst_case) {
5108- LLAMA_LOG_WARN (" %s: %32s: not offloaded (ref: %s)\n " , __func__,
5109- cur->name , " https://github.com/ggerganov/llama.cpp/pull/3837" );
5110- }
5106+ // // if a tensor hasn't been offloaded, we warn the user
5107+ // if (worst_case) {
5108+ // LLAMA_LOG_WARN("%s: %32s: not offloaded (ref: %s)\n", __func__,
5109+ // cur->name, "https://github.com/ggerganov/llama.cpp/pull/3837");
5110+ // }
51115111
51125112 return ;
51135113 }
@@ -5214,6 +5214,30 @@ static struct ggml_cgraph * llama_build_graph(
52145214 GGML_ASSERT (false );
52155215 }
52165216
5217+ #ifdef GGML_USE_CUBLAS
5218+ // TODO: tmp code to help find tensors that haven't been offloaded
5219+ if (worst_case) {
5220+ for (int i = 0 ; i < result->n_nodes ; ++i) {
5221+ struct ggml_tensor * cur = result->nodes [i];
5222+
5223+ if (cur->view_src != nullptr ) {
5224+ continue ;
5225+ }
5226+
5227+ // check the global map for what offload function to use for this tensor
5228+ llm_offload_func_e func_e = k_offload_func_trie.find (cur->name );
5229+
5230+ if (func_e == OFFLOAD_FUNC_NOP && cur->backend == GGML_BACKEND_CPU) {
5231+ // if a tensor hasn't been offloaded, we warn the user
5232+ if (worst_case) {
5233+ LLAMA_LOG_WARN (" %s: %32s: not offloaded (ref: %s)\n " , __func__,
5234+ cur->name , " https://github.com/ggerganov/llama.cpp/pull/3837" );
5235+ }
5236+ }
5237+ }
5238+ }
5239+ #endif
5240+
52175241 return result;
52185242}
52195243
0 commit comments