@@ -1098,11 +1098,11 @@ struct llama_model_loader {
10981098 this ->use_mmap = use_mmap;
10991099 }
11001100
1101- void calc_sizes (size_t * ctx_size_p, size_t * mmapped_size_p) const {
1102- * ctx_size_p = * mmapped_size_p = 0 ;
1101+ void calc_sizes (size_t & ctx_size_p, size_t & mmapped_size_p) const {
1102+ ctx_size_p = mmapped_size_p = 0 ;
11031103 for (const llama_load_tensor & lt : tensors_map.tensors ) {
1104- * ctx_size_p += sizeof (struct ggml_tensor ) + GGML_OBJECT_SIZE;
1105- * (use_mmap ? mmapped_size_p : ctx_size_p) += lt.size + 16 ;
1104+ ctx_size_p += sizeof (struct ggml_tensor ) + GGML_OBJECT_SIZE;
1105+ (use_mmap ? mmapped_size_p : ctx_size_p) += ggml_nbytes_pad ( lt.ggml_tensor ) ;
11061106 }
11071107 }
11081108
@@ -1159,19 +1159,19 @@ struct llama_model_loader {
11591159 }
11601160
11611161 void load_all_data (llama_progress_callback progress_callback, void * progress_callback_user_data, llama_mlock * lmlock) {
1162- size_t data_size = 0 ;
1163- size_t prefetch_size = 0 ;
1164- size_t lock_size = 0 ;
1162+ size_t data_size = 0 ;
1163+ size_t lock_size = 0 ;
1164+ size_t pref_size = 0 ; // prefetch
11651165
11661166 for (const llama_load_tensor & lt : tensors_map.tensors ) {
11671167 data_size += lt.size ;
11681168 if (lt.ggml_tensor ->backend == GGML_BACKEND_CPU) {
1169- prefetch_size += lt.size ;
1169+ pref_size += lt.size ;
11701170 }
11711171 }
11721172
11731173 if (use_mmap) {
1174- mapping.reset (new llama_mmap (&file_loader->file , prefetch_size , ggml_is_numa ()));
1174+ mapping.reset (new llama_mmap (&file_loader->file , pref_size , ggml_is_numa ()));
11751175 if (lmlock) {
11761176 lmlock->init (mapping->addr );
11771177 }
@@ -1404,7 +1404,7 @@ static void llama_model_load_internal(
14041404
14051405 size_t ctx_size;
14061406 size_t mmapped_size;
1407- ml->calc_sizes (& ctx_size, & mmapped_size);
1407+ ml->calc_sizes (ctx_size, mmapped_size);
14081408 LLAMA_LOG_INFO (" %s: ggml ctx size = %7.2f MB\n " , __func__, ctx_size/1024.0 /1024.0 );
14091409
14101410 // create the ggml context
@@ -3688,7 +3688,7 @@ int llama_apply_lora_from_file_internal(const struct llama_model & model, const
36883688
36893689 size_t ctx_size;
36903690 size_t mmapped_size;
3691- model_loader->calc_sizes (& ctx_size, & mmapped_size);
3691+ model_loader->calc_sizes (ctx_size, mmapped_size);
36923692 base_buf.resize (ctx_size);
36933693
36943694 ggml_init_params base_params;
0 commit comments