@@ -2270,18 +2270,15 @@ struct llama_model_loader {
22702270 }
22712271 }
22722272
2273-
2274-
2275- void load_all_data (struct ggml_context * ctx, llama_progress_callback progress_callback, void * progress_callback_user_data, ggml_backend_buffer_t buf_mmap, llama_mlock * lmlock) {
2276- size_t size_lock = 0 ;
2273+ void load_all_data (struct ggml_context * ctx, llama_progress_callback progress_callback, void * progress_callback_user_data, ggml_backend_buffer_t buf_mmap, llama_mlock * lmlock) const {
22772274 size_t size_data = 0 ;
22782275
22792276 for (int i = 0 ; i < gguf_get_n_tensors (ctx_gguf); i++) {
22802277 struct ggml_tensor * cur = ggml_get_tensor (ctx, gguf_get_tensor_name (ctx_gguf, i));
22812278 size_data += ggml_nbytes (cur);
22822279 }
22832280
2284- if (use_mmap) {
2281+ if (use_mmap && buf_mmap ) {
22852282 if (lmlock) {
22862283 lmlock->init (mapping->addr );
22872284 }
@@ -2305,6 +2302,9 @@ struct llama_model_loader {
23052302 if (use_mmap) {
23062303 if (buf_mmap) {
23072304 ggml_backend_tensor_alloc (buf_mmap, cur, (uint8_t *) mapping->addr + offs);
2305+ if (lmlock) {
2306+ lmlock->grow_to (offs + ggml_nbytes (cur));
2307+ }
23082308 } else {
23092309 ggml_backend_tensor_set (cur, (uint8_t *) mapping->addr + offs, 0 , ggml_nbytes (cur));
23102310 }
@@ -2319,11 +2319,6 @@ struct llama_model_loader {
23192319 ggml_backend_tensor_set (cur, read_buf.data (), 0 , ggml_nbytes (cur));
23202320 }
23212321 }
2322-
2323- if (use_mmap && lmlock) {
2324- size_lock += ggml_nbytes (cur);
2325- lmlock->grow_to (size_lock);
2326- }
23272322 } else {
23282323 // HACK: mark tensor as allocated
23292324 cur->data = (void *)(uintptr_t )1 ;
0 commit comments