@@ -4071,8 +4071,8 @@ bool ggml_is_numa(void) {
40714071////////////////////////////////////////////////////////////////////////////////
40724072
40734073void ggml_print_object(const struct ggml_object * obj) {
4074- GGML_PRINT(" - ggml_object: offset = %zu, size = %zu, next = %p\n",
4075- obj->offs, obj->size, (const void *) obj->next);
4074+ GGML_PRINT(" - ggml_object: type = %d, offset = %zu, size = %zu, next = %p\n",
4075+ obj->type, obj-> offs, obj->size, (const void *) obj->next);
40764076}
40774077
40784078void ggml_print_objects(const struct ggml_context * ctx) {
@@ -4212,7 +4212,7 @@ enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype) {
42124212}
42134213
42144214size_t ggml_tensor_overhead(void) {
4215- return GGML_OBJECT_SIZE + GGML_TENSOR_SIZE + 16 ;
4215+ return GGML_OBJECT_SIZE + GGML_TENSOR_SIZE;
42164216}
42174217
42184218bool ggml_is_transposed(const struct ggml_tensor * tensor) {
@@ -4383,7 +4383,7 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
43834383 return NULL;
43844384 }
43854385
4386- const size_t mem_size = ( params.mem_size + GGML_MEM_ALIGN - 1) & ~(GGML_MEM_ALIGN - 1 );
4386+ const size_t mem_size = params.mem_buffer ? params.mem_size : GGML_PAD(params.mem_size, GGML_MEM_ALIGN );
43874387
43884388 *ctx = (struct ggml_context) {
43894389 /*.mem_size =*/ mem_size,
@@ -4472,12 +4472,14 @@ size_t ggml_get_max_tensor_size(const struct ggml_context * ctx) {
44724472 struct ggml_object * obj = ctx->objects_begin;
44734473
44744474 while (obj != NULL) {
4475- struct ggml_tensor * tensor = (struct ggml_tensor *) ((char *) ctx->mem_buffer + obj->offs);
4475+ if (obj->type == GGML_OBJECT_TENSOR) {
4476+ struct ggml_tensor * tensor = (struct ggml_tensor *) ((char *) ctx->mem_buffer + obj->offs);
44764477
4477- const size_t size = ggml_nbytes(tensor);
4478+ const size_t size = ggml_nbytes(tensor);
44784479
4479- if (max_size < size) {
4480- max_size = size;
4480+ if (max_size < size) {
4481+ max_size = size;
4482+ }
44814483 }
44824484
44834485 obj = obj->next;
@@ -4509,90 +4511,87 @@ static void ggml_scratch_load(struct ggml_context * ctx) {
45094511
45104512////////////////////////////////////////////////////////////////////////////////
45114513
4512- static struct ggml_tensor * ggml_new_tensor_impl(
4513- struct ggml_context * ctx,
4514- enum ggml_type type,
4515- int n_dims,
4516- const int64_t* ne,
4517- void* data) {
4514+ static struct ggml_object * ggml_new_object(struct ggml_context * ctx, enum ggml_object_type type, size_t size) {
45184515 // always insert objects at the end of the context's memory pool
45194516 struct ggml_object * obj_cur = ctx->objects_end;
45204517
45214518 const size_t cur_offs = obj_cur == NULL ? 0 : obj_cur->offs;
45224519 const size_t cur_size = obj_cur == NULL ? 0 : obj_cur->size;
45234520 const size_t cur_end = cur_offs + cur_size;
45244521
4525- size_t size_needed = 0;
4526-
4527- if (data == NULL && !ctx->no_alloc) {
4528- size_needed += GGML_TYPE_SIZE[type]*(ne[0]/GGML_BLCK_SIZE[type]);
4529- for (int i = 1; i < n_dims; i++) {
4530- size_needed *= ne[i];
4531- }
4532- // align to GGML_MEM_ALIGN
4533- size_needed = ((size_needed + GGML_MEM_ALIGN - 1)/GGML_MEM_ALIGN)*GGML_MEM_ALIGN;
4534- }
4522+ // align to GGML_MEM_ALIGN
4523+ size_t size_needed = GGML_PAD(size, GGML_MEM_ALIGN);
45354524
45364525 char * const mem_buffer = ctx->mem_buffer;
45374526 struct ggml_object * const obj_new = (struct ggml_object *)(mem_buffer + cur_end);
45384527
4539- if (ctx->scratch.data == NULL || data != NULL) {
4540- size_needed += GGML_TENSOR_SIZE;
4528+ if (cur_end + size_needed + GGML_OBJECT_SIZE > ctx->mem_size) {
4529+ GGML_PRINT("%s: not enough space in the context's memory pool (needed %zu, available %zu)\n",
4530+ __func__, cur_end + size_needed, ctx->mem_size);
4531+ assert(false);
4532+ return NULL;
4533+ }
45414534
4542- if (cur_end + size_needed + GGML_OBJECT_SIZE > ctx->mem_size ) {
4543- GGML_PRINT("%s: not enough space in the context's memory pool (needed %zu, available %zu)\n" ,
4544- __func__, cur_end + size_needed + GGML_OBJECT_SIZE, ctx->mem_size);
4545- assert(false);
4546- return NULL;
4547- }
4535+ *obj_new = (struct ggml_object ) {
4536+ .offs = cur_end + GGML_OBJECT_SIZE ,
4537+ .size = size_needed,
4538+ .next = NULL,
4539+ .type = type,
4540+ };
45484541
4549- *obj_new = (struct ggml_object) {
4550- .offs = cur_end + GGML_OBJECT_SIZE,
4551- .size = size_needed,
4552- .next = NULL,
4553- };
4542+ ggml_assert_aligned(mem_buffer + obj_new->offs);
4543+
4544+ if (obj_cur != NULL) {
4545+ obj_cur->next = obj_new;
45544546 } else {
4555- if (ctx->scratch.offs + size_needed > ctx->scratch.size) {
4556- GGML_PRINT("%s: not enough space in the scratch memory pool (needed %zu, available %zu)\n",
4557- __func__, ctx->scratch.offs + size_needed, ctx->scratch.size);
4558- assert(false);
4559- return NULL;
4547+ // this is the first object in this context
4548+ ctx->objects_begin = obj_new;
4549+ }
4550+
4551+ ctx->objects_end = obj_new;
4552+
4553+ //printf("%s: inserted new object at %zu, size = %zu\n", __func__, cur_end, obj_new->size);
4554+
4555+ return obj_new;
4556+ }
4557+
4558+ static struct ggml_tensor * ggml_new_tensor_impl(
4559+ struct ggml_context * ctx,
4560+ enum ggml_type type,
4561+ int n_dims,
4562+ const int64_t* ne,
4563+ void* data) {
4564+
4565+ size_t data_size = 0;
4566+
4567+ if (data == NULL && !ctx->no_alloc) {
4568+ data_size += GGML_TYPE_SIZE[type]*(ne[0]/GGML_BLCK_SIZE[type]);
4569+ for (int i = 1; i < n_dims; i++) {
4570+ data_size *= ne[i];
45604571 }
4572+ }
45614573
4562- if (cur_end + GGML_TENSOR_SIZE + GGML_OBJECT_SIZE > ctx->mem_size) {
4563- GGML_PRINT("%s: not enough space in the context's memory pool (needed %zu, available %zu)\n",
4564- __func__, cur_end + GGML_TENSOR_SIZE + GGML_OBJECT_SIZE, ctx->mem_size);
4574+ if (ctx->scratch.data != NULL && data == NULL) {
4575+ // allocate tensor data in the scratch buffer
4576+ if (ctx->scratch.offs + data_size > ctx->scratch.size) {
4577+ GGML_PRINT("%s: not enough space in the scratch memory pool (needed %zu, available %zu)\n",
4578+ __func__, ctx->scratch.offs + data_size, ctx->scratch.size);
45654579 assert(false);
45664580 return NULL;
45674581 }
45684582
45694583 data = (char * const) ctx->scratch.data + ctx->scratch.offs;
45704584
4571- *obj_new = (struct ggml_object) {
4572- .offs = cur_end + GGML_OBJECT_SIZE,
4573- .size = GGML_TENSOR_SIZE,
4574- .next = NULL,
4575- };
4576-
4577- //printf("scratch offs = %zu, size_needed = %zu\n", ctx->scratch.offs, size_needed);
4585+ ctx->scratch.offs += data_size;
45784586
4579- ctx->scratch.offs += size_needed ;
4587+ data_size = 0 ;
45804588 }
45814589
4582- if (obj_cur != NULL) {
4583- obj_cur->next = obj_new;
4584- } else {
4585- // this is the first object in this context
4586- ctx->objects_begin = obj_new;
4587- }
4588-
4589- ctx->objects_end = obj_new;
4590-
4591- //printf("%s: inserted new object at %zu, size = %zu\n", __func__, cur_end, obj_new->size);
4590+ struct ggml_object * const obj_new = ggml_new_object(ctx, GGML_OBJECT_TENSOR, GGML_TENSOR_SIZE + data_size);
45924591
4593- struct ggml_tensor * const result = (struct ggml_tensor *)(mem_buffer + obj_new->offs);
4592+ // TODO: for recoverable errors, we would need to free the data allocated from the scratch buffer here
45944593
4595- ggml_assert_aligned( result);
4594+ struct ggml_tensor * const result = (struct ggml_tensor *)((char *)ctx->mem_buffer + obj_new->offs );
45964595
45974596 *result = (struct ggml_tensor) {
45984597 /*.type =*/ type,
@@ -5026,9 +5025,11 @@ struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * nam
50265025 char * const mem_buffer = ctx->mem_buffer;
50275026
50285027 while (obj != NULL) {
5029- struct ggml_tensor * cur = (struct ggml_tensor *)(mem_buffer + obj->offs);
5030- if (strcmp(cur->name, name) == 0) {
5031- return cur;
5028+ if (obj->type == GGML_OBJECT_TENSOR) {
5029+ struct ggml_tensor * cur = (struct ggml_tensor *)(mem_buffer + obj->offs);
5030+ if (strcmp(cur->name, name) == 0) {
5031+ return cur;
5032+ }
50325033 }
50335034
50345035 obj = obj->next;
@@ -15829,6 +15830,35 @@ struct ggml_cgraph ggml_build_backward(struct ggml_context * ctx, struct ggml_cg
1582915830 return result;
1583015831}
1583115832
15833+ struct ggml_cgraph * ggml_new_graph(struct ggml_context * ctx) {
15834+ struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_GRAPH, GGML_GRAPH_SIZE);
15835+ struct ggml_cgraph * cgraph = (struct ggml_cgraph *) ((char *) ctx->mem_buffer + obj->offs);
15836+
15837+ *cgraph = (struct ggml_cgraph) {
15838+ /*.n_nodes =*/ 0,
15839+ /*.n_leafs =*/ 0,
15840+ /*.nodes =*/ { NULL },
15841+ /*.grads =*/ { NULL },
15842+ /*.leafs =*/ { NULL },
15843+ /*.hash_table =*/ { NULL },
15844+ /*.perf_runs =*/ 0,
15845+ /*.perf_cycles =*/ 0,
15846+ /*.perf_time_us =*/ 0,
15847+ };
15848+
15849+ return cgraph;
15850+ }
15851+
15852+ struct ggml_cgraph * ggml_build_forward_ctx(struct ggml_context * ctx, struct ggml_tensor * tensor) {
15853+ struct ggml_cgraph * cgraph = ggml_new_graph(ctx);
15854+ ggml_build_forward_impl(cgraph, tensor, false);
15855+ return cgraph;
15856+ }
15857+
15858+ size_t ggml_graph_overhead(void) {
15859+ return GGML_OBJECT_SIZE + GGML_PAD(GGML_GRAPH_SIZE, GGML_MEM_ALIGN);
15860+ }
15861+
1583215862//
1583315863// thread data
1583415864//
@@ -16544,10 +16574,9 @@ void ggml_graph_reset(struct ggml_cgraph * cgraph) {
1654416574void ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads) {
1654516575 struct ggml_cplan cplan = ggml_graph_plan(cgraph, n_threads);
1654616576
16547- struct ggml_tensor * buf = ggml_new_tensor_1d(ctx, GGML_TYPE_I8, cplan.work_size);
16548- GGML_ASSERT(buf);
16577+ struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_WORK_BUFFER, cplan.work_size);
1654916578
16550- cplan.work_data = buf->data ;
16579+ cplan.work_data = (uint8_t *)ctx->mem_buffer + obj->offs ;
1655116580
1655216581 ggml_graph_compute(cgraph, &cplan);
1655316582}
0 commit comments