|
46 | 46 | #define LLAMA_SESSION_MAGIC LLAMA_FILE_MAGIC_GGSN |
47 | 47 | #define LLAMA_SESSION_VERSION 1 |
48 | 48 |
|
| 49 | +#define LLAMA_DEFAULT_SEED 0xFFFFFFFF |
| 50 | + |
49 | 51 | #if defined(GGML_USE_CUBLAS) || defined(GGML_USE_CLBLAST) || defined(GGML_USE_METAL) |
50 | 52 | // Defined when llama.cpp is compiled with support for offloading model layers to GPU. |
51 | 53 | #define LLAMA_SUPPORTS_GPU_OFFLOAD |
@@ -81,11 +83,11 @@ extern "C" { |
81 | 83 | typedef void (*llama_progress_callback)(float progress, void *ctx); |
82 | 84 |
|
83 | 85 | struct llama_context_params { |
84 | | - int seed; // RNG seed, -1 for random |
85 | | - int n_ctx; // text context |
86 | | - int n_batch; // prompt processing batch size |
87 | | - int n_gpu_layers; // number of layers to store in VRAM |
88 | | - int main_gpu; // the GPU that is used for scratch and small tensors |
| 86 | + uint32_t seed; // RNG seed, -1 for random |
| 87 | + int32_t n_ctx; // text context |
| 88 | + int32_t n_batch; // prompt processing batch size |
| 89 | + int32_t n_gpu_layers; // number of layers to store in VRAM |
| 90 | + int32_t main_gpu; // the GPU that is used for scratch and small tensors |
89 | 91 | float tensor_split[LLAMA_MAX_DEVICES]; // how to split layers across multiple GPUs |
90 | 92 | // called with a progress value between 0 and 1, pass NULL to disable |
91 | 93 | llama_progress_callback progress_callback; |
@@ -196,7 +198,7 @@ extern "C" { |
196 | 198 | LLAMA_API int llama_get_kv_cache_token_count(const struct llama_context * ctx); |
197 | 199 |
|
198 | 200 | // Sets the current rng seed. |
199 | | - LLAMA_API void llama_set_rng_seed(struct llama_context * ctx, int seed); |
| 201 | + LLAMA_API void llama_set_rng_seed(struct llama_context * ctx, uint32_t seed); |
200 | 202 |
|
201 | 203 | // Returns the maximum size in bytes of the state (rng, logits, embedding |
202 | 204 | // and kv_cache) - will often be smaller after compacting tokens |
|
0 commit comments