File tree Expand file tree Collapse file tree 4 files changed +458
-0
lines changed Expand file tree Collapse file tree 4 files changed +458
-0
lines changed Original file line number Diff line number Diff line change @@ -2,6 +2,28 @@ set(TARGET mtl-export)
22add_executable (${TARGET} mtl-export.cpp)
33target_link_libraries (${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT} )
44target_compile_features (${TARGET} PRIVATE cxx_std_11)
5+
56if (TARGET BUILD_INFO)
67 add_dependencies (${TARGET} BUILD_INFO)
78endif ()
9+
10+ if (APPLE )
11+ #
12+ # mtl
13+
14+ find_library (FOUNDATION_LIBRARY Foundation REQUIRED)
15+ find_library (METAL_FRAMEWORK Metal REQUIRED)
16+ find_library (METALKIT_FRAMEWORK MetalKit REQUIRED)
17+ find_library (METALPERFORMANCE_FRAMEWORK MetalPerformanceShaders REQUIRED)
18+
19+ set (TEST_TARGET mtl)
20+ add_executable (${TEST_TARGET} mtl.cpp mtl.h mtl.m)
21+ target_link_libraries (${TEST_TARGET} PRIVATE
22+ ggml
23+ ${FOUNDATION_LIBRARY}
24+ ${METAL_FRAMEWORK}
25+ ${METALKIT_FRAMEWORK}
26+ ${METALPERFORMANCE_FRAMEWORK}
27+ )
28+ endif ()
29+
Original file line number Diff line number Diff line change 1+ #include " ggml.h"
2+ #include " mtl.h"
3+
4+ #include < cstdio>
5+ #include < cstring>
6+ #include < cstdlib>
7+
8+ int main (int argc, char ** argv) {
9+ ggml_time_init ();
10+
11+ if (argc != 2 ) {
12+ fprintf (stderr, " Usage: %s llama.ggml\n " , argv[0 ]);
13+ return -1 ;
14+ }
15+
16+ const char * fname_cgraph = argv[1 ];
17+
18+ // load the compute graph
19+ struct ggml_context * ctx_data = NULL ;
20+ struct ggml_context * ctx_eval = NULL ;
21+
22+ struct ggml_cgraph gf = ggml_graph_import (fname_cgraph, &ctx_data, &ctx_eval);
23+ gf.n_threads = 1 ;
24+
25+ // allocate work context
26+ static size_t buf_size = gf.work_size ; // TODO
27+ static void * buf = malloc (buf_size);
28+
29+ struct ggml_init_params params = {
30+ /* .mem_size =*/ buf_size,
31+ /* .mem_buffer =*/ buf,
32+ /* .no_alloc =*/ false ,
33+ };
34+
35+ struct ggml_context * ctx_work = ggml_init (params);
36+
37+ // this allocates all Metal resources and memory buffers
38+ auto * ctx_mtl = llama_mtl_init (ctx_data, ctx_eval, ctx_work, &gf);
39+
40+ // the actual inference happens here
41+ llama_mtl_eval (ctx_mtl, &gf);
42+
43+ llama_mtl_free (ctx_mtl);
44+
45+ ggml_free (ctx_work);
46+ ggml_free (ctx_data);
47+ ggml_free (ctx_eval);
48+
49+ return 0 ;
50+ }
51+
Original file line number Diff line number Diff line change 1+ #pragma once
2+
3+ struct ggml_context ;
4+ struct ggml_cgraph ;
5+
6+ #ifdef __cplusplus
7+ extern "C" {
8+ #endif
9+
10+ struct ggml_mtl_context ;
11+
12+ struct ggml_mtl_context * llama_mtl_init (
13+ struct ggml_context * ctx_data ,
14+ struct ggml_context * ctx_eval ,
15+ struct ggml_context * ctx_work ,
16+ struct ggml_cgraph * gf );
17+
18+ void llama_mtl_free (struct ggml_mtl_context * ctx );
19+
20+ // return 0 on success
21+ int llama_mtl_eval (
22+ struct ggml_mtl_context * ctx ,
23+ struct ggml_cgraph * gf );
24+
25+ #ifdef __cplusplus
26+ }
27+ #endif
28+
You can’t perform that action at this time.
0 commit comments