4343static struct ggml_backend_reg g_ggml_backend_metal_reg;
4444static struct ggml_backend_device g_ggml_backend_metal_device;
4545
46+ // virtual address for GPU memory allocations
47+ static atomic_uintptr_t g_addr_device = 0x000000400ULL ;
48+
4649// information about a Metal device
4750// note: assumes single GPU device - the default one
4851// TODO: support multiple GPU devices
@@ -1787,9 +1790,11 @@ static void ggml_metal_free(struct ggml_backend_metal_context * ctx) {
17871790};
17881791
17891792struct ggml_backend_metal_buffer_context {
1790- void * all_data;
1793+ void * all_data; // for shared buffers
17911794 size_t all_size;
17921795
1796+ void * base_addr;
1797+
17931798 // if false, the Metal buffer data is allocated in private GPU memory and is not shared with the host
17941799 bool is_shared;
17951800
@@ -6035,33 +6040,42 @@ static void ggml_backend_metal_buffer_shared_free_buffer(ggml_backend_buffer_t b
60356040}
60366041
60376042static void * ggml_backend_metal_buffer_shared_get_base (ggml_backend_buffer_t buffer) {
6038- struct ggml_backend_metal_buffer_context * ctx = (struct ggml_backend_metal_buffer_context *)buffer->context ;
6043+ struct ggml_backend_metal_buffer_context * ctx = (struct ggml_backend_metal_buffer_context *) buffer->context ;
60396044
6040- return ctx->all_data ;
6045+ return ctx->base_addr ;
60416046}
60426047
60436048static void ggml_backend_metal_buffer_shared_memset_tensor (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size) {
6044- struct ggml_backend_metal_buffer_context * ctx = (struct ggml_backend_metal_buffer_context *)buffer->context ;
6049+ struct ggml_backend_metal_buffer_context * ctx = (struct ggml_backend_metal_buffer_context *) tensor-> buffer ->context ;
60456050
60466051 GGML_ASSERT (ctx->is_shared );
60476052
6048- memset ((char *)tensor->data + offset, value, size);
6053+ const ptrdiff_t base_offset = (char *)tensor->data - (char *)ctx->base_addr ;
6054+ memset ((char *) ctx->all_data + base_offset + offset, value, size);
6055+
6056+ GGML_UNUSED (buffer);
60496057}
60506058
60516059static void ggml_backend_metal_buffer_shared_set_tensor (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
6052- struct ggml_backend_metal_buffer_context * ctx = (struct ggml_backend_metal_buffer_context *)buffer->context ;
6060+ struct ggml_backend_metal_buffer_context * ctx = (struct ggml_backend_metal_buffer_context *) tensor-> buffer ->context ;
60536061
60546062 GGML_ASSERT (ctx->is_shared );
60556063
6056- memcpy ((char *)tensor->data + offset, data, size);
6064+ const ptrdiff_t base_offset = (char *)tensor->data - (char *)ctx->base_addr ;
6065+ memcpy ((char *) ctx->all_data + base_offset + offset, data, size);
6066+
6067+ GGML_UNUSED (buffer);
60576068}
60586069
60596070static void ggml_backend_metal_buffer_shared_get_tensor (ggml_backend_buffer_t buffer, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) {
6060- struct ggml_backend_metal_buffer_context * ctx = (struct ggml_backend_metal_buffer_context *)buffer->context ;
6071+ struct ggml_backend_metal_buffer_context * ctx = (struct ggml_backend_metal_buffer_context *) tensor-> buffer ->context ;
60616072
60626073 GGML_ASSERT (ctx->is_shared );
60636074
6064- memcpy (data, (const char *)tensor->data + offset, size);
6075+ const ptrdiff_t base_offset = (char *)tensor->data - (char *)ctx->base_addr ;
6076+ memcpy (data, (const char *) ctx->all_data + base_offset + offset, size);
6077+
6078+ GGML_UNUSED (buffer);
60656079}
60666080
60676081static bool ggml_backend_metal_buffer_shared_cpy_tensor (ggml_backend_buffer_t buffer, const struct ggml_tensor * src, struct ggml_tensor * dst) {
@@ -6111,7 +6125,7 @@ static void ggml_backend_metal_buffer_private_free_buffer(ggml_backend_buffer_t
61116125static void * ggml_backend_metal_buffer_private_get_base (ggml_backend_buffer_t buffer) {
61126126 struct ggml_backend_metal_buffer_context * ctx = (struct ggml_backend_metal_buffer_context *)buffer->context ;
61136127
6114- return ctx->all_data ;
6128+ return ctx->base_addr ;
61156129}
61166130
61176131static void ggml_backend_metal_buffer_private_memset_tensor (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size) {
@@ -6329,14 +6343,14 @@ static ggml_backend_buffer_t ggml_backend_metal_buffer_type_alloc_buffer(ggml_ba
63296343 ctx->all_data = ggml_metal_host_malloc (size_aligned);
63306344 ctx->is_shared = true ;
63316345 } else {
6332- // virtual address for GPU memory allocations
6333- static atomic_uintptr_t addr_device = 0x000000400ULL ;
6334-
6335- ctx->all_data = (void *) atomic_fetch_add_explicit (&addr_device, size_aligned, memory_order_relaxed);
6346+ // dummy, non-NULL value - not used
6347+ ctx->all_data = (void *) 0x000000400ULL ;
63366348 ctx->is_shared = false ;
63376349 }
63386350 ctx->all_size = size_aligned;
63396351
6352+ ctx->base_addr = (void *) atomic_fetch_add_explicit (&g_addr_device, size_aligned, memory_order_relaxed);
6353+
63406354 ctx->device = device;
63416355 ctx->queue = ctx_dev->mtl_queue ;
63426356
@@ -6347,7 +6361,7 @@ static ggml_backend_buffer_t ggml_backend_metal_buffer_type_alloc_buffer(ggml_ba
63476361 ctx->buffers [0 ].metal = nil ;
63486362
63496363 if (size_aligned > 0 ) {
6350- if (ctx_dev->use_shared_buffers ) {
6364+ if (ctx_dev->use_shared_buffers && shared ) {
63516365 ctx->buffers [0 ].metal = [device newBufferWithBytesNoCopy: ctx->all_data
63526366 length: size_aligned
63536367 options: MTLResourceStorageModeShared
@@ -6362,7 +6376,7 @@ static ggml_backend_buffer_t ggml_backend_metal_buffer_type_alloc_buffer(ggml_ba
63626376 }
63636377 }
63646378
6365- ctx->buffers [0 ].data = ctx->all_data ;
6379+ ctx->buffers [0 ].data = ctx->base_addr ;
63666380 }
63676381
63686382 if (size_aligned > 0 && (ctx->all_data == NULL || ctx->buffers [0 ].metal == nil )) {
@@ -6963,6 +6977,13 @@ static ggml_backend_buffer_t ggml_backend_metal_device_buffer_mapped(ggml_backen
69636977 size_aligned += (size_page - (size_aligned % size_page));
69646978 }
69656979
6980+ // note: I think this is preferred because we want to have both the mapped and non-mapped buffers in the same
6981+ // address space. not sure if there are any side-effects from this though.
6982+ // ctx->base_addr = (void *) atomic_fetch_add_explicit(&g_addr_device, size_aligned, memory_order_relaxed);
6983+ //
6984+ // note2: the above does not actually work
6985+ ctx->base_addr = ptr;
6986+
69666987 struct ggml_backend_metal_device_context * ctx_dev = (struct ggml_backend_metal_device_context *)dev->context ;
69676988
69686989 GGML_ASSERT (ctx_dev->mtl_device != nil );
@@ -6974,7 +6995,7 @@ static ggml_backend_buffer_t ggml_backend_metal_device_buffer_mapped(ggml_backen
69746995
69756996 // the buffer fits into the max buffer size allowed by the device
69766997 if (size_aligned <= device.maxBufferLength ) {
6977- ctx->buffers [ctx->n_buffers].data = ptr ;
6998+ ctx->buffers [ctx->n_buffers].data = ctx-> base_addr ;
69786999 ctx->buffers [ctx->n_buffers].size = size;
69797000 ctx->buffers [ctx->n_buffers].metal = nil ;
69807001
@@ -7000,7 +7021,7 @@ static ggml_backend_buffer_t ggml_backend_metal_device_buffer_mapped(ggml_backen
70007021 for (size_t i = 0 ; i < size; i += size_step) {
70017022 const size_t size_step_aligned = (i + size_view <= size) ? size_view : (size_aligned - i);
70027023
7003- ctx->buffers [ctx->n_buffers].data = (void *) ((uint8_t *) ptr + i);
7024+ ctx->buffers [ctx->n_buffers].data = (void *) ((uint8_t *) ctx-> base_addr + i);
70047025 ctx->buffers [ctx->n_buffers].size = size_step_aligned;
70057026 ctx->buffers [ctx->n_buffers].metal = nil ;
70067027
0 commit comments