remove deprecated llama_sampler_init_softmax ggml-org/llama.cpp#9896 (comment)

okaris · okaris · commit d7de15a84205 · 2025-09-06T07:29:01.000Z
diff --git a/llama_cpp/_internals.py b/llama_cpp/_internals.py
@@ -685,10 +685,6 @@ def add_dist(self, seed: int):
         sampler = llama_cpp.llama_sampler_init_dist(seed)
         llama_cpp.llama_sampler_chain_add(self.sampler, sampler)
 
-    def add_softmax(self):
-        sampler = llama_cpp.llama_sampler_init_softmax()
-        llama_cpp.llama_sampler_chain_add(self.sampler, sampler)
-
     def add_top_k(self, k: int):
         sampler = llama_cpp.llama_sampler_init_top_k(k)
         llama_cpp.llama_sampler_chain_add(self.sampler, sampler)
diff --git a/llama_cpp/llama_cpp.py b/llama_cpp/llama_cpp.py
@@ -3551,15 +3551,6 @@ def llama_sampler_init_dist(seed: int) -> llama_sampler_p:
     ...
 
 
-# /// @details Sorts candidate tokens by their logits in descending order and calculate probabilities based on logits.
-# /// NOTE: Avoid using on the full vocabulary as the sorting can become slow. For example, apply top-k or top-p sampling first.
-# DEPRECATED(LLAMA_API struct llama_sampler * llama_sampler_init_softmax    (void),
-#     "will be removed in the future (see https://github.com/ggml-org/llama.cpp/pull/9896#discussion_r1800920915)");
-@ctypes_function("llama_sampler_init_softmax", [], llama_sampler_p_ctypes)
-def llama_sampler_init_softmax() -> llama_sampler_p:
-    ...
-
-
 # /// @details Top-K sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751
 # /// Setting k <= 0 makes this a noop
 # LLAMA_API struct llama_sampler * llama_sampler_init_top_k      (int32_t k);