Skip to content

Commit 6045c5a

Browse files
committed
cont : put all buffers in the same virtual address space
ggml-ci
1 parent 626fa1d commit 6045c5a

File tree

1 file changed

+39
-18
lines changed

1 file changed

+39
-18
lines changed

ggml/src/ggml-metal/ggml-metal.m

Lines changed: 39 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,9 @@
4343
static struct ggml_backend_reg g_ggml_backend_metal_reg;
4444
static struct ggml_backend_device g_ggml_backend_metal_device;
4545

46+
// virtual address for GPU memory allocations
47+
static atomic_uintptr_t g_addr_device = 0x000000400ULL;
48+
4649
// information about a Metal device
4750
// note: assumes single GPU device - the default one
4851
// TODO: support multiple GPU devices
@@ -1787,9 +1790,11 @@ static void ggml_metal_free(struct ggml_backend_metal_context * ctx) {
17871790
};
17881791

17891792
struct ggml_backend_metal_buffer_context {
1790-
void * all_data;
1793+
void * all_data; // for shared buffers
17911794
size_t all_size;
17921795

1796+
void * base_addr;
1797+
17931798
// if false, the Metal buffer data is allocated in private GPU memory and is not shared with the host
17941799
bool is_shared;
17951800

@@ -6035,33 +6040,42 @@ static void ggml_backend_metal_buffer_shared_free_buffer(ggml_backend_buffer_t b
60356040
}
60366041

60376042
static void * ggml_backend_metal_buffer_shared_get_base(ggml_backend_buffer_t buffer) {
6038-
struct ggml_backend_metal_buffer_context * ctx = (struct ggml_backend_metal_buffer_context *)buffer->context;
6043+
struct ggml_backend_metal_buffer_context * ctx = (struct ggml_backend_metal_buffer_context *) buffer->context;
60396044

6040-
return ctx->all_data;
6045+
return ctx->base_addr;
60416046
}
60426047

60436048
static void ggml_backend_metal_buffer_shared_memset_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size) {
6044-
struct ggml_backend_metal_buffer_context * ctx = (struct ggml_backend_metal_buffer_context *)buffer->context;
6049+
struct ggml_backend_metal_buffer_context * ctx = (struct ggml_backend_metal_buffer_context *) tensor->buffer->context;
60456050

60466051
GGML_ASSERT(ctx->is_shared);
60476052

6048-
memset((char *)tensor->data + offset, value, size);
6053+
const ptrdiff_t base_offset = (char *)tensor->data - (char *)ctx->base_addr;
6054+
memset((char *) ctx->all_data + base_offset + offset, value, size);
6055+
6056+
GGML_UNUSED(buffer);
60496057
}
60506058

60516059
static void ggml_backend_metal_buffer_shared_set_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
6052-
struct ggml_backend_metal_buffer_context * ctx = (struct ggml_backend_metal_buffer_context *)buffer->context;
6060+
struct ggml_backend_metal_buffer_context * ctx = (struct ggml_backend_metal_buffer_context *) tensor->buffer->context;
60536061

60546062
GGML_ASSERT(ctx->is_shared);
60556063

6056-
memcpy((char *)tensor->data + offset, data, size);
6064+
const ptrdiff_t base_offset = (char *)tensor->data - (char *)ctx->base_addr;
6065+
memcpy((char *) ctx->all_data + base_offset + offset, data, size);
6066+
6067+
GGML_UNUSED(buffer);
60576068
}
60586069

60596070
static void ggml_backend_metal_buffer_shared_get_tensor(ggml_backend_buffer_t buffer, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) {
6060-
struct ggml_backend_metal_buffer_context * ctx = (struct ggml_backend_metal_buffer_context *)buffer->context;
6071+
struct ggml_backend_metal_buffer_context * ctx = (struct ggml_backend_metal_buffer_context *) tensor->buffer->context;
60616072

60626073
GGML_ASSERT(ctx->is_shared);
60636074

6064-
memcpy(data, (const char *)tensor->data + offset, size);
6075+
const ptrdiff_t base_offset = (char *)tensor->data - (char *)ctx->base_addr;
6076+
memcpy(data, (const char *) ctx->all_data + base_offset + offset, size);
6077+
6078+
GGML_UNUSED(buffer);
60656079
}
60666080

60676081
static bool ggml_backend_metal_buffer_shared_cpy_tensor(ggml_backend_buffer_t buffer, const struct ggml_tensor * src, struct ggml_tensor * dst) {
@@ -6111,7 +6125,7 @@ static void ggml_backend_metal_buffer_private_free_buffer(ggml_backend_buffer_t
61116125
static void * ggml_backend_metal_buffer_private_get_base(ggml_backend_buffer_t buffer) {
61126126
struct ggml_backend_metal_buffer_context * ctx = (struct ggml_backend_metal_buffer_context *)buffer->context;
61136127

6114-
return ctx->all_data;
6128+
return ctx->base_addr;
61156129
}
61166130

61176131
static void ggml_backend_metal_buffer_private_memset_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size) {
@@ -6329,14 +6343,14 @@ static ggml_backend_buffer_t ggml_backend_metal_buffer_type_alloc_buffer(ggml_ba
63296343
ctx->all_data = ggml_metal_host_malloc(size_aligned);
63306344
ctx->is_shared = true;
63316345
} else {
6332-
// virtual address for GPU memory allocations
6333-
static atomic_uintptr_t addr_device = 0x000000400ULL;
6334-
6335-
ctx->all_data = (void *) atomic_fetch_add_explicit(&addr_device, size_aligned, memory_order_relaxed);
6346+
// dummy, non-NULL value - not used
6347+
ctx->all_data = (void *) 0x000000400ULL;
63366348
ctx->is_shared = false;
63376349
}
63386350
ctx->all_size = size_aligned;
63396351

6352+
ctx->base_addr = (void *) atomic_fetch_add_explicit(&g_addr_device, size_aligned, memory_order_relaxed);
6353+
63406354
ctx->device = device;
63416355
ctx->queue = ctx_dev->mtl_queue;
63426356

@@ -6347,7 +6361,7 @@ static ggml_backend_buffer_t ggml_backend_metal_buffer_type_alloc_buffer(ggml_ba
63476361
ctx->buffers[0].metal = nil;
63486362

63496363
if (size_aligned > 0) {
6350-
if (ctx_dev->use_shared_buffers) {
6364+
if (ctx_dev->use_shared_buffers && shared) {
63516365
ctx->buffers[0].metal = [device newBufferWithBytesNoCopy:ctx->all_data
63526366
length:size_aligned
63536367
options:MTLResourceStorageModeShared
@@ -6362,7 +6376,7 @@ static ggml_backend_buffer_t ggml_backend_metal_buffer_type_alloc_buffer(ggml_ba
63626376
}
63636377
}
63646378

6365-
ctx->buffers[0].data = ctx->all_data;
6379+
ctx->buffers[0].data = ctx->base_addr;
63666380
}
63676381

63686382
if (size_aligned > 0 && (ctx->all_data == NULL || ctx->buffers[0].metal == nil)) {
@@ -6963,6 +6977,13 @@ static ggml_backend_buffer_t ggml_backend_metal_device_buffer_mapped(ggml_backen
69636977
size_aligned += (size_page - (size_aligned % size_page));
69646978
}
69656979

6980+
// note: I think this is preferred because we want to have both the mapped and non-mapped buffers in the same
6981+
// address space. not sure if there are any side-effects from this though.
6982+
//ctx->base_addr = (void *) atomic_fetch_add_explicit(&g_addr_device, size_aligned, memory_order_relaxed);
6983+
//
6984+
// note2: the above does not actually work
6985+
ctx->base_addr = ptr;
6986+
69666987
struct ggml_backend_metal_device_context * ctx_dev = (struct ggml_backend_metal_device_context *)dev->context;
69676988

69686989
GGML_ASSERT(ctx_dev->mtl_device != nil);
@@ -6974,7 +6995,7 @@ static ggml_backend_buffer_t ggml_backend_metal_device_buffer_mapped(ggml_backen
69746995

69756996
// the buffer fits into the max buffer size allowed by the device
69766997
if (size_aligned <= device.maxBufferLength) {
6977-
ctx->buffers[ctx->n_buffers].data = ptr;
6998+
ctx->buffers[ctx->n_buffers].data = ctx->base_addr;
69786999
ctx->buffers[ctx->n_buffers].size = size;
69797000
ctx->buffers[ctx->n_buffers].metal = nil;
69807001

@@ -7000,7 +7021,7 @@ static ggml_backend_buffer_t ggml_backend_metal_device_buffer_mapped(ggml_backen
70007021
for (size_t i = 0; i < size; i += size_step) {
70017022
const size_t size_step_aligned = (i + size_view <= size) ? size_view : (size_aligned - i);
70027023

7003-
ctx->buffers[ctx->n_buffers].data = (void *) ((uint8_t *) ptr + i);
7024+
ctx->buffers[ctx->n_buffers].data = (void *) ((uint8_t *) ctx->base_addr + i);
70047025
ctx->buffers[ctx->n_buffers].size = size_step_aligned;
70057026
ctx->buffers[ctx->n_buffers].metal = nil;
70067027

0 commit comments

Comments
 (0)