@@ -95,6 +95,8 @@ enum ggml_status ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_te
9595
9696// dynamic tensor allocator
9797
98+ #define GGML_VBUFFER_MAX_CHUNKS 16
99+
98100struct free_block {
99101 size_t offset ;
100102 size_t size ;
@@ -103,8 +105,9 @@ struct free_block {
103105struct ggml_dyn_tallocr {
104106 size_t alignment ;
105107 int n_free_blocks ;
108+ int n_chunks ;
106109 struct free_block free_blocks [MAX_FREE_BLOCKS ];
107- size_t max_size ;
110+ size_t max_size [ GGML_VBUFFER_MAX_CHUNKS ] ;
108111 size_t max_chunk_size ;
109112
110113#ifdef GGML_ALLOCATOR_DEBUG
@@ -117,10 +120,21 @@ struct ggml_dyn_tallocr {
117120
118121// the memory range [0, max_size) is divided into n chunks of size max_chunk_size (with the last chunk possibly being smaller).
119122// tensor allocations may not cross chunk boundaries.
120- static void ggml_dyn_tallocr_new_chunk (struct ggml_dyn_tallocr * alloc , struct free_block * block ) {
121- size_t n_chunks = (alloc -> max_size + alloc -> max_chunk_size - 1 ) / alloc -> max_chunk_size ;
122- block -> offset = n_chunks * alloc -> max_chunk_size ;
123- block -> size = alloc -> max_chunk_size ;
123+ static size_t ggml_dyn_tallocr_chunk_index (struct ggml_dyn_tallocr * alloc , size_t offset ) {
124+ for (int i = 0 ; i < alloc -> n_chunks ; i ++ ) {
125+ if (offset < alloc -> max_size [i ]) {
126+ return i ;
127+ }
128+ }
129+ return alloc -> n_chunks - 1 ;
130+ }
131+
132+ static void ggml_dyn_tallocr_new_chunk (struct ggml_dyn_tallocr * alloc , struct free_block * block , size_t min_size ) {
133+ GGML_ASSERT (alloc -> n_chunks >= 1 );
134+ block -> offset = alloc -> max_size [alloc -> n_chunks - 1 ];
135+ block -> size = MAX (min_size , alloc -> max_chunk_size );
136+ alloc -> n_chunks ++ ;
137+ GGML_ASSERT (alloc -> n_chunks <= GGML_VBUFFER_MAX_CHUNKS );
124138}
125139
126140#ifdef GGML_ALLOCATOR_DEBUG
@@ -149,10 +163,6 @@ static size_t ggml_dyn_tallocr_alloc(struct ggml_dyn_tallocr * alloc, size_t siz
149163 size = aligned_offset (NULL , size , alloc -> alignment );
150164
151165 AT_PRINTF ("%s: allocating %s (%zu bytes) - " , __func__ , tensor -> name , size );
152- if (size > alloc -> max_chunk_size ) {
153- GGML_ABORT ("allocation failed: tensor %s (%zu bytes) exceeds maximum backend buffer size (%zu bytes)\n" ,
154- tensor -> name , size , alloc -> max_chunk_size );
155- }
156166
157167 size_t max_avail = 0 ;
158168
@@ -172,14 +182,10 @@ static size_t ggml_dyn_tallocr_alloc(struct ggml_dyn_tallocr * alloc, size_t siz
172182 // the last block represents memory still available in an existing chunk
173183 struct free_block * block = & alloc -> free_blocks [alloc -> n_free_blocks - 1 ];
174184 max_avail = MAX (max_avail , block -> size );
175- if (block -> size >= size ) {
176- best_fit_block = alloc -> n_free_blocks - 1 ;
177- } else {
178- // not enough space in existing chunk, create a new one at the end
179- best_fit_block = alloc -> n_free_blocks ;
180- alloc -> n_free_blocks += 1 ;
181- GGML_ASSERT (alloc -> n_free_blocks < MAX_FREE_BLOCKS && "out of free blocks" );
182- ggml_dyn_tallocr_new_chunk (alloc , & alloc -> free_blocks [alloc -> n_free_blocks - 1 ]);
185+ best_fit_block = alloc -> n_free_blocks - 1 ;
186+ if (block -> size < size ) {
187+ // not enough space in existing chunk, start the next one
188+ ggml_dyn_tallocr_new_chunk (alloc , & alloc -> free_blocks [best_fit_block ], size );
183189 }
184190 }
185191
@@ -196,7 +202,7 @@ static size_t ggml_dyn_tallocr_alloc(struct ggml_dyn_tallocr * alloc, size_t siz
196202 // if there are no remaining blocks all memory in current chunk was used up -> start the next one
197203 if (alloc -> n_free_blocks == 0 ) {
198204 alloc -> n_free_blocks = 1 ;
199- ggml_dyn_tallocr_new_chunk (alloc , & alloc -> free_blocks [0 ]);
205+ ggml_dyn_tallocr_new_chunk (alloc , & alloc -> free_blocks [0 ], 0 );
200206 }
201207 }
202208
@@ -232,7 +238,7 @@ static size_t ggml_dyn_tallocr_alloc(struct ggml_dyn_tallocr * alloc, size_t siz
232238 }
233239#endif
234240
235- alloc -> max_size = MAX (alloc -> max_size , offset + size );
241+ alloc -> max_size [ alloc -> n_chunks - 1 ] = MAX (alloc -> max_size [ alloc -> n_chunks - 1 ] , offset + size );
236242
237243 return offset ;
238244
@@ -248,13 +254,13 @@ static void ggml_dyn_tallocr_free_tensor(struct ggml_dyn_tallocr * alloc, size_t
248254#ifdef GGML_ALLOCATOR_DEBUG
249255 remove_allocated_tensor (alloc , offset , tensor );
250256#endif
251- size_t chunk = offset / alloc -> max_chunk_size ;
257+ size_t chunk = ggml_dyn_tallocr_chunk_index ( alloc , offset ) ;
252258
253259 // see if we can merge with an existing block
254260 for (int i = 0 ; i < alloc -> n_free_blocks ; i ++ ) {
255261 struct free_block * block = & alloc -> free_blocks [i ];
256262 // can only merge with blocks within the same chunk
257- size_t block_chunk = block -> offset / alloc -> max_chunk_size ;
263+ size_t block_chunk = ggml_dyn_tallocr_chunk_index ( alloc , block -> offset ) ;
258264 if (chunk != block_chunk ) {
259265 continue ;
260266 }
@@ -264,7 +270,7 @@ static void ggml_dyn_tallocr_free_tensor(struct ggml_dyn_tallocr * alloc, size_t
264270 // check if we can merge with the next block (within the same chunk)
265271 if (i < alloc -> n_free_blocks - 1 ) {
266272 struct free_block * next = & alloc -> free_blocks [i + 1 ];
267- if (block -> offset + block -> size == next -> offset && block_chunk == ( next -> offset / alloc -> max_chunk_size )) {
273+ if (block -> offset + block -> size == next -> offset && block_chunk == ggml_dyn_tallocr_chunk_index ( alloc , next -> offset )) {
268274 block -> size += next -> size ;
269275 alloc -> n_free_blocks -- ;
270276 for (int j = i + 1 ; j < alloc -> n_free_blocks ; j ++ ) {
@@ -281,7 +287,7 @@ static void ggml_dyn_tallocr_free_tensor(struct ggml_dyn_tallocr * alloc, size_t
281287 // check if we can merge with the previous block (within the same chunk)
282288 if (i > 0 ) {
283289 struct free_block * prev = & alloc -> free_blocks [i - 1 ];
284- if (prev -> offset + prev -> size == block -> offset && block_chunk == ( prev -> offset / alloc -> max_chunk_size )) {
290+ if (prev -> offset + prev -> size == block -> offset && block_chunk == ggml_dyn_tallocr_chunk_index ( alloc , prev -> offset )) {
285291 prev -> size += block -> size ;
286292 alloc -> n_free_blocks -- ;
287293 for (int j = i ; j < alloc -> n_free_blocks ; j ++ ) {
@@ -313,9 +319,10 @@ static void ggml_dyn_tallocr_free_tensor(struct ggml_dyn_tallocr * alloc, size_t
313319
314320static void ggml_dyn_tallocr_reset (struct ggml_dyn_tallocr * alloc ) {
315321 alloc -> n_free_blocks = 1 ;
322+ alloc -> n_chunks = 1 ;
316323 alloc -> free_blocks [0 ].offset = 0 ;
317324 alloc -> free_blocks [0 ].size = alloc -> max_chunk_size ;
318- alloc -> max_size = 0 ;
325+ memset ( alloc -> max_size , 0 , sizeof ( alloc -> max_size )) ;
319326
320327 if (alloc -> free_blocks [0 ].size == SIZE_MAX ) {
321328 alloc -> free_blocks [0 ].size = SIZE_MAX /2 ; // avoid overflows
@@ -334,8 +341,9 @@ static struct ggml_dyn_tallocr * ggml_dyn_tallocr_new(size_t alignment, size_t m
334341 * alloc = (struct ggml_dyn_tallocr ) {
335342 /*.alignment = */ alignment ,
336343 /*.n_free_blocks = */ 0 ,
344+ /*.n_chunks = */ 0 ,
337345 /*.free_blocks = */ {{0 }},
338- /*.max_size = */ 0 ,
346+ /*.max_size = */ { 0 } ,
339347 /*.max_chunk_size = */ max_buffer_size ,
340348#ifdef GGML_ALLOCATOR_DEBUG
341349 /*.allocated_tensors = */ {{0 }},
@@ -352,14 +360,12 @@ static void ggml_dyn_tallocr_free(struct ggml_dyn_tallocr * alloc) {
352360}
353361
354362static size_t ggml_dyn_tallocr_max_size (struct ggml_dyn_tallocr * alloc ) {
355- return alloc -> max_size ;
363+ return alloc -> max_size [ alloc -> n_chunks - 1 ] ;
356364}
357365
358366
359367// virtual buffer with contiguous memory range, split into multiple backend buffers (chunks)
360368
361- #define GGML_VBUFFER_MAX_CHUNKS 8
362-
363369struct vbuffer {
364370 ggml_backend_buffer_type_t buft ;
365371 ggml_backend_buffer_t chunks [GGML_VBUFFER_MAX_CHUNKS ];
@@ -401,36 +407,32 @@ static size_t ggml_vbuffer_size(struct vbuffer * buf) {
401407 return size ;
402408}
403409
404- static int ggml_vbuffer_alloc (struct vbuffer * buf , size_t size , enum ggml_backend_buffer_usage usage ) {
405- size_t max_chunk_size = ggml_backend_buft_get_max_size (buf -> buft );
406- if (size > GGML_VBUFFER_MAX_CHUNKS * max_chunk_size ) {
407- return 0 ;
408- }
409-
410- int n = 0 ;
411- // always allocate at least 1 chunk even if requested size is 0
412- while (size > 0 || n == 0 ) {
413- GGML_ASSERT (n < GGML_VBUFFER_MAX_CHUNKS );
414- size_t chunk_size = MIN (size , max_chunk_size );
410+ static bool ggml_vbuffer_alloc (struct vbuffer * buf , const struct ggml_dyn_tallocr * talloc , enum ggml_backend_buffer_usage usage ) {
411+ for (int n = 0 ; n < talloc -> n_chunks ; n ++ ) {
412+ size_t chunk_size = talloc -> max_size [n ];
413+ if (n > 0 ) {
414+ chunk_size -= talloc -> max_size [n - 1 ];
415+ }
415416 buf -> chunks [n ] = ggml_backend_buft_alloc_buffer (buf -> buft , chunk_size );
416417 if (buf -> chunks [n ] == NULL ) {
417418 ggml_vbuffer_free_chunks (buf );
418- return 0 ;
419+ return false ;
419420 }
420421 ggml_backend_buffer_set_usage (buf -> chunks [n ], usage );
421-
422- GGML_ASSERT (size >= chunk_size );
423- size -= chunk_size ;
424- n += 1 ;
425422 }
426- return n ;
423+ return true ;
427424}
428425
429426static void ggml_vbuffer_tensor_alloc (struct vbuffer * buf , struct ggml_tensor * tensor , size_t offset ) {
430- size_t max_chunk_size = ggml_backend_buft_get_max_size (buf -> buft );
431- size_t chunk_index = offset / max_chunk_size ;
432- size_t chunk_offset = offset % max_chunk_size ;
433- GGML_ASSERT (chunk_index < GGML_VBUFFER_MAX_CHUNKS && buf -> chunks [chunk_index ] != NULL );
427+ size_t chunk_index = 0 , chunk_offset = offset ;
428+ while (chunk_index < GGML_VBUFFER_MAX_CHUNKS && buf -> chunks [chunk_index ]) {
429+ size_t chunk_size = ggml_backend_buffer_get_size (buf -> chunks [chunk_index ]);
430+ if (chunk_offset < chunk_size ) {
431+ break ;
432+ }
433+ chunk_offset -= chunk_size ;
434+ chunk_index ++ ;
435+ }
434436
435437 void * base = ggml_backend_buffer_get_base (buf -> chunks [chunk_index ]);
436438 void * addr = (char * )base + chunk_offset ;
@@ -880,7 +882,7 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c
880882#endif
881883
882884 ggml_vbuffer_free_chunks (galloc -> buffers [i ]);
883- if (!ggml_vbuffer_alloc (galloc -> buffers [i ], new_size , GGML_BACKEND_BUFFER_USAGE_COMPUTE )) {
885+ if (!ggml_vbuffer_alloc (galloc -> buffers [i ], galloc -> buf_tallocs [ i ] , GGML_BACKEND_BUFFER_USAGE_COMPUTE )) {
884886 GGML_LOG_ERROR ("%s: failed to allocate %s buffer of size %zu\n" , __func__ , ggml_backend_buft_name (galloc -> bufts [i ]), new_size );
885887 return false;
886888 }
0 commit comments