@@ -22,7 +22,7 @@ extern "C" {
2222 size_t (* get_max_size ) (ggml_backend_buffer_type_t buft );
2323 // (optional) data size needed to allocate the tensor, including padding (defaults to ggml_nbytes)
2424 size_t (* get_alloc_size )(ggml_backend_buffer_type_t buft , const struct ggml_tensor * tensor );
25- // (optional) check if tensor data is in host memory (defaults to false)
25+ // (optional) check if tensor data is in host memory and uses standard ggml tensor layout (defaults to false)
2626 bool (* is_host ) (ggml_backend_buffer_type_t buft );
2727 };
2828
@@ -37,7 +37,6 @@ extern "C" {
3737 //
3838
3939 struct ggml_backend_buffer_i {
40- const char * (* get_name ) (ggml_backend_buffer_t buffer );
4140 // (optional) free the buffer
4241 void (* free_buffer ) (ggml_backend_buffer_t buffer );
4342 // base address of the buffer
@@ -88,19 +87,16 @@ extern "C" {
8887
8988 void (* free )(ggml_backend_t backend );
9089
91- // Will be moved to the device interface
92- // buffer allocation
93- ggml_backend_buffer_type_t (* get_default_buffer_type )(ggml_backend_t backend );
94-
9590 // (optional) asynchronous tensor data access
9691 void (* set_tensor_async )(ggml_backend_t backend , struct ggml_tensor * tensor , const void * data , size_t offset , size_t size );
9792 void (* get_tensor_async )(ggml_backend_t backend , const struct ggml_tensor * tensor , void * data , size_t offset , size_t size );
9893 bool (* cpy_tensor_async )(ggml_backend_t backend_src , ggml_backend_t backend_dst , const struct ggml_tensor * src , struct ggml_tensor * dst );
9994
100- // (optional) complete all pending operations
95+ // (optional) complete all pending operations (required if the backend supports async operations)
10196 void (* synchronize )(ggml_backend_t backend );
10297
103- // (optional) compute graph with a plan (not used currently)
98+ // (optional) graph plans
99+ // compute graph with a plan (not used currently)
104100 ggml_backend_graph_plan_t (* graph_plan_create ) (ggml_backend_t backend , const struct ggml_cgraph * cgraph );
105101 void (* graph_plan_free ) (ggml_backend_t backend , ggml_backend_graph_plan_t plan );
106102 // update the plan with a new graph - this should be faster than creating a new plan when the graph has the same topology
@@ -111,13 +107,6 @@ extern "C" {
111107 // compute graph (always async if supported by the backend)
112108 enum ggml_status (* graph_compute ) (ggml_backend_t backend , struct ggml_cgraph * cgraph );
113109
114- // IMPORTANT: these functions have been moved to the device interface and will be removed from the backend interface
115- // new backends should implement the device interface instead
116- // These functions are being moved to the device interface
117- bool (* supports_op ) (ggml_backend_t backend , const struct ggml_tensor * op );
118- bool (* supports_buft )(ggml_backend_t backend , ggml_backend_buffer_type_t buft );
119- bool (* offload_op ) (ggml_backend_t backend , const struct ggml_tensor * op );
120-
121110 // (optional) event synchronization
122111 // record an event on this stream
123112 void (* event_record )(ggml_backend_t backend , ggml_backend_event_t event );
0 commit comments