@@ -1551,9 +1551,8 @@ static bool llama_kv_cache_init(
15511551
15521552 // buf may be NULL with full offload
15531553 if (cache.buf ) {
1554- // TODO: ggml_backend_buffer_memset
1555- // this is only valid with CPU buffers!
1556- // memset(ggml_backend_buffer_get_base(cache.buf), 0, ggml_backend_buffer_get_size(cache.buf));
1554+ // initialize the buffer to avoid NaNs in the padding
1555+ ggml_backend_buffer_clear (cache.buf , 0 );
15571556 }
15581557
15591558 if (vram_kv_cache > 0 ) {
@@ -3569,8 +3568,12 @@ static void llm_load_tensors(
35693568 {
35703569 size_t sys_mem_required = ctx_size + buf_size;
35713570
3572- LLAMA_LOG_INFO (" %s: system memory used = %7.2f MiB\n " , __func__, sys_mem_required / 1024.0 / 1024.0 );
3573- LLAMA_LOG_INFO (" %s: VRAM used = %7.2f MiB\n " , __func__, vram_weights / 1024.0 / 1024.0 );
3571+ {
3572+ LLAMA_LOG_INFO (" %s: system memory used = %7.2f MiB\n " , __func__, sys_mem_required / 1024.0 / 1024.0 );
3573+ }
3574+ if (vram_weights > 0 ) {
3575+ LLAMA_LOG_INFO (" %s: VRAM used = %7.2f MiB\n " , __func__, vram_weights / 1024.0 / 1024.0 );
3576+ }
35743577
35753578#if defined(GGML_USE_CUBLAS) || defined(GGML_USE_CLBLAST)
35763579 const int n_gpu = std::min (n_gpu_layers, int (hparams.n_layer ));
@@ -3586,7 +3589,6 @@ static void llm_load_tensors(
35863589 LLAMA_LOG_INFO (" %s: offloaded %d/%d layers to GPU\n " , __func__, std::min (n_gpu_layers, max_offloadable_layers), max_backend_supported_layers);
35873590#else
35883591 GGML_UNUSED (n_gpu_layers);
3589- GGML_UNUSED (vram_weights);
35903592 GGML_UNUSED (tensor_split);
35913593#endif // defined(GGML_USE_CUBLAS) || defined(GGML_USE_CLBLAST)
35923594 }
@@ -3601,7 +3603,6 @@ static void llm_load_tensors(
36013603 ggml_cuda_set_tensor_split (tensor_split);
36023604#endif // GGML_USE_CUBLAS
36033605
3604- // TODO: only pass buf if it is a mmap buffer
36053606 ml.load_all_data (ctx, progress_callback, progress_callback_user_data, buf_mmap, use_mlock ? &model.mlock_mmap : NULL );
36063607
36073608 if (progress_callback) {
0 commit comments