1919// max number of MTLCommandBuffer used to submit a graph for processing
2020#define GGML_METAL_MAX_COMMAND_BUFFERS 8
2121
22- // max number of buffers that can be allocated on the heap per command buffer
23- #define GGML_METAL_MAX_HEAP_BUFFERS 64
24-
2522#ifndef TARGET_OS_VISION
2623#define TARGET_OS_VISION 0
2724#endif
@@ -472,14 +469,15 @@ static void ggml_backend_metal_device_rel(struct ggml_backend_metal_device_conte
472469};
473470
474471struct ggml_metal_heap {
475- int n;
476472 int fail;
477473
474+ size_t offs;
478475 size_t need;
479476
480477 id <MTLDevice > device;
481478 id <MTLHeap > obj;
482- id <MTLBuffer > bufs[GGML_METAL_MAX_HEAP_BUFFERS];
479+
480+ NSMutableArray * bufs;
483481};
484482
485483static struct ggml_metal_heap * ggml_metal_heap_init (id <MTLDevice > device, size_t size) {
@@ -488,7 +486,7 @@ static void ggml_backend_metal_device_rel(struct ggml_backend_metal_device_conte
488486 MTLHeapDescriptor * desc = [[MTLHeapDescriptor alloc ] init ];
489487 desc.storageMode = MTLStorageModePrivate ;
490488 desc.cpuCacheMode = MTLCPUCacheModeDefaultCache ;
491- desc.type = MTLHeapTypeAutomatic ; // TODO: use MTLHeapTypePlacement
489+ desc.type = MTLHeapTypePlacement ;
492490 desc.size = size;
493491
494492 heap->device = device;
@@ -501,39 +499,35 @@ static void ggml_backend_metal_device_rel(struct ggml_backend_metal_device_conte
501499 return false ;
502500 }
503501
504- for (int i = 0 ; i < GGML_METAL_MAX_HEAP_BUFFERS; ++i) {
505- heap->bufs [i] = nil ;
506- }
507-
508502 [desc release ];
509503
504+ heap->bufs = [[NSMutableArray alloc ] init ];
505+
510506 return heap;
511507}
512508
509+ static void ggml_metal_heap_reset (struct ggml_metal_heap * heap) {
510+ heap->fail = 0 ;
511+ heap->offs = 0 ;
512+ heap->need = 0 ;
513+
514+ for (id <MTLBuffer > buf in heap->bufs ) {
515+ [buf release ];
516+ }
517+ [heap->bufs removeAllObjects ];
518+ }
519+
513520static void ggml_metal_heap_free (struct ggml_metal_heap * heap) {
514521 if (heap == nil ) {
515522 return ;
516523 }
517524
518- [heap->obj release ];
519-
520- free (heap);
521- }
522-
523- static void ggml_metal_heap_reset (struct ggml_metal_heap * heap) {
524- heap->n = 0 ;
525- heap->fail = 0 ;
526- heap->need = 0 ;
525+ ggml_metal_heap_reset (heap);
527526
528- for (int i = 0 ; i < GGML_METAL_MAX_HEAP_BUFFERS; i++) {
529- if (heap->bufs [i]) {
530- [heap->bufs[i] release ];
531- heap->bufs [i] = nil ;
532- continue ;
533- }
527+ [heap->obj release ];
528+ [heap->bufs release ];
534529
535- break ;
536- }
530+ free (heap);
537531}
538532
539533static bool ggml_metal_heap_resize (struct ggml_metal_heap * heap, size_t size) {
@@ -546,7 +540,7 @@ static bool ggml_metal_heap_resize(struct ggml_metal_heap * heap, size_t size) {
546540 MTLHeapDescriptor * desc = [[MTLHeapDescriptor alloc ] init ];
547541 desc.storageMode = MTLStorageModePrivate ;
548542 desc.cpuCacheMode = MTLCPUCacheModeDefaultCache ;
549- desc.type = MTLHeapTypeAutomatic ; // TODO: use MTLHeapTypePlacement
543+ desc.type = MTLHeapTypePlacement ;
550544 desc.size = size;
551545
552546 heap->obj = [heap->device newHeapWithDescriptor: desc];
@@ -571,33 +565,32 @@ static bool ggml_metal_heap_resize(struct ggml_metal_heap * heap, size_t size) {
571565
572566 const size_t size_aligned = GGML_PAD (size, alignment);
573567
574- // GGML_LOG_INFO("%s: size = %zu, size_aligned = %zu, need = %zu, fail = %d\n", __func__, size, size_aligned, heap->need, heap->fail);
568+ heap->offs += size_aligned;
569+ heap->need = MAX (heap->need , heap->offs + size_aligned);
575570
576- heap-> need += size_aligned;
571+ // GGML_LOG_INFO("%s: size = %zu, size_aligned = %zu, offs = %zu, need = %zu\n", __func__, size, size_aligned, offs, heap->offs, heap->need) ;
577572
578573 if (no_alloc) {
579574 return nil ;
580575 }
581576
582- if (!heap->fail && size_aligned > [heap->obj maxAvailableSizeWithAlignment: alignment ]) {
577+ if (!heap->fail && heap-> offs + size_aligned > [heap->obj size ]) {
583578 heap->fail = 1 ;
584579 }
585580
586- if (!heap->fail && heap->n >= GGML_METAL_MAX_HEAP_BUFFERS) {
587- heap->fail = 2 ;
588- }
589-
590581 if (heap->fail ) {
591582 return nil ;
592583 }
593584
594- id <MTLBuffer > buf = [heap->obj newBufferWithLength: size_aligned options: MTLResourceStorageModePrivate ];
585+ id <MTLBuffer > buf = [heap->obj newBufferWithLength: size_aligned options: MTLResourceStorageModePrivate offset: heap->offs ];
595586 if (!buf) {
596587 heap->fail = 3 ;
597588 return nil ;
598589 }
599590
600- heap->bufs [heap->n++] = buf;
591+ [heap->bufs addObject: buf];
592+
593+ // GGML_LOG_INFO("%s: allocated buffer, size = %zu, offs = %zu, heap size = %zu, heap used = %zu\n", __func__, size_aligned, offs, [heap->obj size], [heap->obj usedSize]);
601594
602595 return buf;
603596}
@@ -634,7 +627,6 @@ static bool ggml_metal_heap_resize(struct ggml_metal_heap * heap, size_t size) {
634627 void (^encode_async)(size_t ith);
635628
636629 // n_cb command buffers + 1 used by the main thread
637- // id<MTLCommandBuffer> command_buffers[GGML_METAL_MAX_COMMAND_BUFFERS + 1];
638630 struct ggml_metal_command_buffer cmd_bufs[GGML_METAL_MAX_COMMAND_BUFFERS + 1 ];
639631
640632 // abort ggml_metal_graph_compute if callback returns true
@@ -1638,13 +1630,16 @@ static bool ggml_metal_encode_node(
16381630 // heap buffers for temporary data
16391631 id <MTLBuffer > h_src0 = nil ;
16401632
1633+ // always allocate buffers from the start of the heap for the current node
1634+ heap->offs = 0 ;
1635+
16411636 switch (dst->op ) {
16421637 case GGML_OP_SOFT_MAX:
16431638 {
16441639 h_src0 = ggml_metal_heap_alloc (heap, ggml_nbytes (src0), no_alloc);
16451640 if (!no_alloc && !h_src0) {
1646- GGML_LOG_ERROR (" %s : failed to allocate buffer, idx = %4d , size = %8zu , need = %8zu , max available = %9zu , heap size = %9zu , heap used = %zu , fail = %d \n " ,
1647- __func__, idx, ggml_nbytes (src0), heap->need , [heap->obj maxAvailableSizeWithAlignment: 0 ], [heap->obj size ], [heap->obj usedSize ], heap->fail );
1641+ GGML_LOG_ERROR (" %s : failed to allocate buffer, idx = %4d , size = %8zu , offs = %8zu , max available = %9zu , heap size = %9zu , heap used = %zu , fail = %d \n " ,
1642+ __func__, idx, ggml_nbytes (src0), heap->offs , [heap->obj maxAvailableSizeWithAlignment: 0 ], [heap->obj size ], [heap->obj usedSize ], heap->fail );
16481643 return false ;
16491644 }
16501645 } break ;
@@ -2250,8 +2245,6 @@ static bool ggml_metal_encode_node(
22502245 {
22512246 GGML_ASSERT (!src1 || src1->type == GGML_TYPE_F16 || src1->type == GGML_TYPE_F32);
22522247
2253- GGML_ASSERT (ggml_is_contiguous (src0));
2254-
22552248 int nth = 32 ; // SIMD width
22562249
22572250 id <MTLComputePipelineState > pipeline = nil ;
@@ -4836,6 +4829,12 @@ static enum ggml_status ggml_metal_graph_compute(
48364829 [next_buffer commit ];
48374830 }
48384831
4832+ for (int i = 0 ; i <= n_cb; ++i) {
4833+ struct ggml_metal_heap * heap = ctx->cmd_bufs [i].heap ;
4834+
4835+ [heap->obj setPurgeableState: MTLPurgeableStateEmpty ];
4836+ }
4837+
48394838 if (!should_capture && ctx->capture_started ) {
48404839 [ctx->capture_scope endScope ];
48414840 [[MTLCaptureManager sharedCaptureManager ] stopCapture ];
@@ -5233,6 +5232,8 @@ static void ggml_backend_metal_set_n_cb(ggml_backend_t backend, int n_cb) {
52335232 }
52345233 }
52355234
5235+ // GGML_LOG_INFO("XXXXXXXXXXXXXXXXXXXXXXXXX\n");
5236+
52365237 if (can_compute) {
52375238 for (int idx = node_start; idx < node_end; ++idx) {
52385239 if (should_capture) {
0 commit comments