From cdb6fb5293df7c3350ac61cff7189699a5a260ab Mon Sep 17 00:00:00 2001
From: Patrick Peng <retr0@retr0.blog>
Date: Thu, 19 Jun 2025 21:16:44 +0800
Subject: [PATCH 1/3] vocab : prevent stack overflow in tokenize

---
 src/llama-vocab.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp
index dd2251ef3cbef..3398faeadd468 100644
--- a/src/llama-vocab.cpp
+++ b/src/llama-vocab.cpp
@@ -3060,6 +3060,10 @@ int32_t llama_vocab::tokenize(
                         bool   add_special,
                         bool   parse_special) const {
     auto res = tokenize(std::string(text, text_len), add_special, parse_special);
+    if (res.size() >= static_cast<size_t>(std::numeric_limits<int32_t>::max())) {
+        GGML_ABORT("tokenization result size %zu exceeds int32_t limit", res.size());
+    }
+    
     if (n_tokens_max < (int) res.size()) {
         // LLAMA_LOG_ERROR("%s: too many tokens\n", __func__);
         return -((int) res.size());

From f5972a1f6fa317a843711f7a98d3c20b1b10d50b Mon Sep 17 00:00:00 2001
From: Patrick Peng <retr0@retr0.blog>
Date: Thu, 19 Jun 2025 22:03:50 +0800
Subject: [PATCH 2/3] vocab : return error instead of aborting on oversized
 token count

---
 src/llama-vocab.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp
index 3398faeadd468..41f0f888ddedb 100644
--- a/src/llama-vocab.cpp
+++ b/src/llama-vocab.cpp
@@ -3061,7 +3061,8 @@ int32_t llama_vocab::tokenize(
                         bool   parse_special) const {
     auto res = tokenize(std::string(text, text_len), add_special, parse_special);
     if (res.size() >= static_cast<size_t>(std::numeric_limits<int32_t>::max())) {
-        GGML_ABORT("tokenization result size %zu exceeds int32_t limit", res.size());
+        LLAMA_LOG_ERROR("%s: tokenization result size %zu exceeds int32_t limit\n", __func__, res.size());
+        return -1;
     }
     
     if (n_tokens_max < (int) res.size()) {

From d087f749505554e72113b225fe796233b06f7e2a Mon Sep 17 00:00:00 2001
From: Patrick Peng <retr0@retr0.blog>
Date: Fri, 20 Jun 2025 01:22:46 +0800
Subject: [PATCH 3/3] vocab : INT32_MIN from llama_tokenize on overflow

---
 common/common.cpp   | 3 +++
 include/llama.h     | 1 +
 src/llama-vocab.cpp | 2 +-
 3 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/common/common.cpp b/common/common.cpp
index eb80cee0894a6..eb71c7cb28f8f 100644
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -1284,6 +1284,9 @@ std::vector<llama_token> common_tokenize(
     int n_tokens = text.length() + 2 * add_special;
     std::vector<llama_token> result(n_tokens);
     n_tokens = llama_tokenize(vocab, text.data(), text.length(), result.data(), result.size(), add_special, parse_special);
+    if (n_tokens == std::numeric_limits<int32_t>::min()) {
+        throw std::runtime_error("Tokenization failed: input text too large, tokenization result exceeds int32_t limit");
+    }
     if (n_tokens < 0) {
         result.resize(-n_tokens);
         int check = llama_tokenize(vocab, text.data(), text.length(), result.data(), result.size(), add_special, parse_special);
diff --git a/include/llama.h b/include/llama.h
index 635508b10f2ff..90918cad4626f 100644
--- a/include/llama.h
+++ b/include/llama.h
@@ -1087,6 +1087,7 @@ extern "C" {
     /// @param tokens The tokens pointer must be large enough to hold the resulting tokens.
     /// @return Returns the number of tokens on success, no more than n_tokens_max
     /// @return Returns a negative number on failure - the number of tokens that would have been returned
+    /// @return Returns INT32_MIN on overflow (e.g., tokenization result size exceeds int32_t limit)
     /// @param add_special Allow to add BOS and EOS tokens if model is configured to do so.
     /// @param parse_special Allow tokenizing special and/or control tokens which otherwise are not exposed and treated
     ///                      as plaintext. Does not insert a leading space.
diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp
index 41f0f888ddedb..695e5762fff7a 100644
--- a/src/llama-vocab.cpp
+++ b/src/llama-vocab.cpp
@@ -3062,7 +3062,7 @@ int32_t llama_vocab::tokenize(
     auto res = tokenize(std::string(text, text_len), add_special, parse_special);
     if (res.size() >= static_cast<size_t>(std::numeric_limits<int32_t>::max())) {
         LLAMA_LOG_ERROR("%s: tokenization result size %zu exceeds int32_t limit\n", __func__, res.size());
-        return -1;
+        return std::numeric_limits<int32_t>::min();
     }
     
     if (n_tokens_max < (int) res.size()) {