@@ -40,9 +40,9 @@ extern "C" {
4040 typedef int llama_token;
4141
4242 typedef struct llama_token_data {
43- llama_token id; // token id
44- float logit; // log-odds of the token
45- float p; // probability of the token
43+ llama_token id; // token id
44+ float logit; // log-odds of the token
45+ float p; // probability of the token
4646 } llama_token_data;
4747
4848 typedef struct llama_token_data_array {
@@ -73,23 +73,30 @@ extern "C" {
7373
7474 // model file types
7575 enum llama_ftype {
76- LLAMA_FTYPE_ALL_F32 = 0 ,
77- LLAMA_FTYPE_MOSTLY_F16 = 1 , // except 1d tensors
78- LLAMA_FTYPE_MOSTLY_Q4_0 = 2 , // except 1d tensors
79- LLAMA_FTYPE_MOSTLY_Q4_1 = 3 , // except 1d tensors
76+ LLAMA_FTYPE_ALL_F32 = 0 ,
77+ LLAMA_FTYPE_MOSTLY_F16 = 1 , // except 1d tensors
78+ LLAMA_FTYPE_MOSTLY_Q4_0 = 2 , // except 1d tensors
79+ LLAMA_FTYPE_MOSTLY_Q4_1 = 3 , // except 1d tensors
8080 LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16 = 4 , // tok_embeddings.weight and output.weight are F16
81- // LLAMA_FTYPE_MOSTLY_Q4_2 = 5, // support has been removed
82- // LLAMA_FTYPE_MOSTLY_Q4_3 (6) support has been removed
83- LLAMA_FTYPE_MOSTLY_Q8_0 = 7 , // except 1d tensors
84- LLAMA_FTYPE_MOSTLY_Q5_0 = 8 , // except 1d tensors
85- LLAMA_FTYPE_MOSTLY_Q5_1 = 9 , // except 1d tensors
81+ // LLAMA_FTYPE_MOSTLY_Q4_2 = 5, // support has been removed
82+ // LLAMA_FTYPE_MOSTLY_Q4_3 = 6, // support has been removed
83+ LLAMA_FTYPE_MOSTLY_Q8_0 = 7 , // except 1d tensors
84+ LLAMA_FTYPE_MOSTLY_Q5_0 = 8 , // except 1d tensors
85+ LLAMA_FTYPE_MOSTLY_Q5_1 = 9 , // except 1d tensors
8686 };
8787
8888 LLAMA_API struct llama_context_params llama_context_default_params ();
8989
9090 LLAMA_API bool llama_mmap_supported ();
9191 LLAMA_API bool llama_mlock_supported ();
9292
93+ // TODO: not great API - very likely to change
94+ // Initialize the llama + ggml backend
95+ // Call once at the start of the program
96+ LLAMA_API void llama_init_backend ();
97+
98+ LLAMA_API int64_t llama_time_us ();
99+
93100 // Various functions for loading a ggml llama model.
94101 // Allocate (almost) all memory needed for the model.
95102 // Return NULL on failure
0 commit comments