From c6c4f7c41f2ebb8db91db3193244be7cecb1515e Mon Sep 17 00:00:00 2001
From: ExtReMLapin <3909752+ExtReMLapin@users.noreply.github.com>
Date: Mon, 11 Aug 2025 18:21:59 +0200
Subject: [PATCH 01/12] Update chat.cpp to support (at least) qwen3 +
tool_choice = required
---
common/chat.cpp | 15 +++++++++++++--
1 file changed, 13 insertions(+), 2 deletions(-)
diff --git a/common/chat.cpp b/common/chat.cpp
index b1a1218ca2c97..7ac3686e12e9e 100644
--- a/common/chat.cpp
+++ b/common/chat.cpp
@@ -1586,8 +1586,9 @@ static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat
}
if (!inputs.tools.is_null()) {
+ auto supports_thinking = tmpl.source().find("") != std::string::npos && data.thinking_forced_open == false;
// (content)?({"name": "foo", "arguments": {"a": 1}})*
- data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
+ data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED || supports_thinking;
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
std::vector tool_rules;
std::vector tool_call_alts;
@@ -1639,9 +1640,19 @@ static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat
tool_call_alts.push_back(
"( \"```\\n\" | \"```json\\n\" | \"```xml\\n\" ) space " + wrappable_tool_call + " space \"```\" space ");
auto tool_call = builder.add_rule("tool_call", string_join(tool_call_alts, " | "));
- builder.add_rule("root",
+ if (supports_thinking)
+ {
+ builder.add_rule("thinking", "\"\" [^\\x00]* \"\" space");
+ builder.add_rule("root",
+ "(thinking)? space " +
+ (inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call));
+ }
+ else
+ {
+ builder.add_rule("root",
std::string(data.thinking_forced_open ? "( \"\" space )? " : "") +
(inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call));
+ }
// Trigger on some common known "good bad" outputs (only from the start and with a json that's about a specific argument name to avoid false positives)
data.grammar_triggers.push_back({
COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
From 42937a53226167e0f0d5f112d01685a9e1f44caf Mon Sep 17 00:00:00 2001
From: ExtReMLapin <3909752+ExtReMLapin@users.noreply.github.com>
Date: Mon, 11 Aug 2025 23:21:48 +0200
Subject: [PATCH 02/12] refactored changes to follow string tern op
---
common/chat.cpp | 14 ++++----------
1 file changed, 4 insertions(+), 10 deletions(-)
diff --git a/common/chat.cpp b/common/chat.cpp
index 7ac3686e12e9e..932231facd572 100644
--- a/common/chat.cpp
+++ b/common/chat.cpp
@@ -1640,19 +1640,13 @@ static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat
tool_call_alts.push_back(
"( \"```\\n\" | \"```json\\n\" | \"```xml\\n\" ) space " + wrappable_tool_call + " space \"```\" space ");
auto tool_call = builder.add_rule("tool_call", string_join(tool_call_alts, " | "));
- if (supports_thinking)
- {
+ if (supports_thinking) {
builder.add_rule("thinking", "\"\" [^\\x00]* \"\" space");
- builder.add_rule("root",
- "(thinking)? space " +
- (inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call));
}
- else
- {
- builder.add_rule("root",
- std::string(data.thinking_forced_open ? "( \"\" space )? " : "") +
+ builder.add_rule("root",
+ std::string(supports_thinking ? "(thinking)? space " :
+ data.thinking_forced_open ? "( \"\" space )? " : "") +
(inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call));
- }
// Trigger on some common known "good bad" outputs (only from the start and with a json that's about a specific argument name to avoid false positives)
data.grammar_triggers.push_back({
COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
From 5796938c03bb35144f7f0cde1474f0295a3bb14b Mon Sep 17 00:00:00 2001
From: ExtReMLapin <3909752+ExtReMLapin@users.noreply.github.com>
Date: Tue, 12 Aug 2025 06:42:04 +0200
Subject: [PATCH 03/12] fixing editorconfig-checker CI (tailing whitespace)
---
common/chat.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/common/chat.cpp b/common/chat.cpp
index 932231facd572..429a636685e58 100644
--- a/common/chat.cpp
+++ b/common/chat.cpp
@@ -1644,7 +1644,7 @@ static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat
builder.add_rule("thinking", "\"\" [^\\x00]* \"\" space");
}
builder.add_rule("root",
- std::string(supports_thinking ? "(thinking)? space " :
+ std::string(supports_thinking ? "(thinking)? space " :
data.thinking_forced_open ? "( \"\" space )? " : "") +
(inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call));
// Trigger on some common known "good bad" outputs (only from the start and with a json that's about a specific argument name to avoid false positives)
From 79e4a7bdbcf5212dc50bf48c40084394f2225194 Mon Sep 17 00:00:00 2001
From: Pierre F
Date: Mon, 25 Aug 2025 18:53:46 +0200
Subject: [PATCH 04/12] hermes 2 pro tool calling, better support for thinking
(thinking tag already opened grammar)
---
common/chat.cpp | 32 ++++++++++++++++++++++++--------
1 file changed, 24 insertions(+), 8 deletions(-)
diff --git a/common/chat.cpp b/common/chat.cpp
index fa0473643762c..c230eecaa42da 100644
--- a/common/chat.cpp
+++ b/common/chat.cpp
@@ -1767,9 +1767,11 @@ static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat
}
if (!inputs.tools.is_null()) {
- auto supports_thinking = tmpl.source().find("") != std::string::npos && data.thinking_forced_open == false;
+ auto supports_thinking = tmpl.source().find("") != std::string::npos;
+ // you should not be able to call enable_thinking if is not supported
+ GGML_ASSERT(!extra_context["enable_thinking"] || extra_context["enable_thinking"] == supports_thinking);
// (content)?({"name": "foo", "arguments": {"a": 1}})*
- data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED || supports_thinking;
+ data.grammar_lazy = true;
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
std::vector tool_rules;
std::vector tool_call_alts;
@@ -1821,13 +1823,27 @@ static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat
tool_call_alts.push_back(
"( \"```\\n\" | \"```json\\n\" | \"```xml\\n\" ) space " + wrappable_tool_call + " space \"```\" space ");
auto tool_call = builder.add_rule("tool_call", string_join(tool_call_alts, " | "));
- if (supports_thinking) {
- builder.add_rule("thinking", "\"\" [^\\x00]* \"\" space");
+
+ builder.add_rule("thinking_start", "\"\"");
+ builder.add_rule("thinking_content", "[^\\x00]*");
+ builder.add_rule("thinking_end", "\"\" space");
+
+ //thinking grammar logic depending on if thinking_forced_open was to true (so already opened (and maybe closed)) and if thinking is even allowed
+ std::string thinking_grammar_logic = ""; // thinking tag was closed or not supported/wanted
+ if (extra_context["enable_thinking"]) {
+ if (data.thinking_forced_open) {
+ //thinking tag was already opened by used so we don't need to add it again
+ thinking_grammar_logic = "thinking_content thinking_end ";
+ }
+ else
+ {
+ thinking_grammar_logic = "thinking_start thinking_content thinking_end ";
+ }
}
- builder.add_rule("root",
- std::string(supports_thinking ? "(thinking)? space " :
- data.thinking_forced_open ? "( \"\" space )? " : "") +
- (inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call));
+
+
+ builder.add_rule("root", thinking_grammar_logic + (inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call));
+
// Trigger on some common known "good bad" outputs (only from the start and with a json that's about a specific argument name to avoid false positives)
data.grammar_triggers.push_back({
COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
From dbae9214b9cd15e1097168eda1be3f1115dd3f40 Mon Sep 17 00:00:00 2001
From: Pierre F
Date: Mon, 25 Aug 2025 19:45:19 +0200
Subject: [PATCH 05/12] qwen hermes tool calling : fixed grammar rules names
---
common/chat.cpp | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/common/chat.cpp b/common/chat.cpp
index c230eecaa42da..67db927a5c8d3 100644
--- a/common/chat.cpp
+++ b/common/chat.cpp
@@ -1824,20 +1824,20 @@ static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat
"( \"```\\n\" | \"```json\\n\" | \"```xml\\n\" ) space " + wrappable_tool_call + " space \"```\" space ");
auto tool_call = builder.add_rule("tool_call", string_join(tool_call_alts, " | "));
- builder.add_rule("thinking_start", "\"\"");
- builder.add_rule("thinking_content", "[^\\x00]*");
- builder.add_rule("thinking_end", "\"\" space");
+ builder.add_rule("thinking-start", "\"\"");
+ builder.add_rule("thinking-content", "[^\\x00]*");
+ builder.add_rule("thinking-end", "\"\" space");
//thinking grammar logic depending on if thinking_forced_open was to true (so already opened (and maybe closed)) and if thinking is even allowed
std::string thinking_grammar_logic = ""; // thinking tag was closed or not supported/wanted
if (extra_context["enable_thinking"]) {
if (data.thinking_forced_open) {
//thinking tag was already opened by used so we don't need to add it again
- thinking_grammar_logic = "thinking_content thinking_end ";
+ thinking_grammar_logic = "thinking-content thinking-end ";
}
else
{
- thinking_grammar_logic = "thinking_start thinking_content thinking_end ";
+ thinking_grammar_logic = "thinking-start thinking-content thinking-end ";
}
}
From 86493dd92b7d7ab2e2312df08b937ecbf0470ef7 Mon Sep 17 00:00:00 2001
From: Pierre F
Date: Mon, 25 Aug 2025 20:13:45 +0200
Subject: [PATCH 06/12] fixed really weird grammar crash `Unexpected empty
grammar stack after accepting piece:`
---
common/chat.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/common/chat.cpp b/common/chat.cpp
index 67db927a5c8d3..52743a1d321a4 100644
--- a/common/chat.cpp
+++ b/common/chat.cpp
@@ -1837,7 +1837,7 @@ static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat
}
else
{
- thinking_grammar_logic = "thinking-start thinking-content thinking-end ";
+ thinking_grammar_logic = "(thinking-start thinking-content thinking-end)? ";
}
}
From bb5e352ad91cdb868df831513442607193d6e6a6 Mon Sep 17 00:00:00 2001
From: Pierre F
Date: Mon, 25 Aug 2025 20:26:38 +0200
Subject: [PATCH 07/12] also apply the hotcrashfix here, just in case
---
common/chat.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/common/chat.cpp b/common/chat.cpp
index 52743a1d321a4..23a446da91655 100644
--- a/common/chat.cpp
+++ b/common/chat.cpp
@@ -1833,7 +1833,7 @@ static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat
if (extra_context["enable_thinking"]) {
if (data.thinking_forced_open) {
//thinking tag was already opened by used so we don't need to add it again
- thinking_grammar_logic = "thinking-content thinking-end ";
+ thinking_grammar_logic = "(thinking-content thinking-end)? ";
}
else
{
From 6d5f561896d5d75647503ddbbd1e9d2bae3cab24 Mon Sep 17 00:00:00 2001
From: Pierre F
Date: Tue, 26 Aug 2025 12:30:57 +0200
Subject: [PATCH 08/12] reverted changes done to grammar_lazy for hermes 2
---
common/chat.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/common/chat.cpp b/common/chat.cpp
index 23a446da91655..5f7519281431c 100644
--- a/common/chat.cpp
+++ b/common/chat.cpp
@@ -1771,7 +1771,7 @@ static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat
// you should not be able to call enable_thinking if is not supported
GGML_ASSERT(!extra_context["enable_thinking"] || extra_context["enable_thinking"] == supports_thinking);
// (content)?({"name": "foo", "arguments": {"a": 1}})*
- data.grammar_lazy = true;
+ data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
std::vector tool_rules;
std::vector tool_call_alts;
From 352274e116de585e5fd657a15ae06c31694655de Mon Sep 17 00:00:00 2001
From: Pierre F
Date: Tue, 26 Aug 2025 12:39:50 +0200
Subject: [PATCH 09/12] if there is enable_thinking enabled but hermes model
doesn't support it, just disable it, don't GGML_ABORT
---
common/chat.cpp | 10 +++++++---
1 file changed, 7 insertions(+), 3 deletions(-)
diff --git a/common/chat.cpp b/common/chat.cpp
index 5f7519281431c..6e087625a5980 100644
--- a/common/chat.cpp
+++ b/common/chat.cpp
@@ -1758,6 +1758,13 @@ static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat
data.prompt = apply(tmpl, inputs, /* messages_override =*/ std::nullopt, /* tools_override= */ std::nullopt, extra_context);
data.format = COMMON_CHAT_FORMAT_HERMES_2_PRO;
+ auto supports_thinking = tmpl.source().find("") != std::string::npos;
+
+ // you should not be able to call enable_thinking if is not supported
+ if (!supports_thinking && extra_context["enable_thinking"]) {
+ extra_context["enable_thinking"] = false;
+ }
+
if (string_ends_with(data.prompt, "\n")) {
if (!extra_context["enable_thinking"]) {
data.prompt += "";
@@ -1767,9 +1774,6 @@ static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat
}
if (!inputs.tools.is_null()) {
- auto supports_thinking = tmpl.source().find("") != std::string::npos;
- // you should not be able to call enable_thinking if is not supported
- GGML_ASSERT(!extra_context["enable_thinking"] || extra_context["enable_thinking"] == supports_thinking);
// (content)?({"name": "foo", "arguments": {"a": 1}})*
data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
From 0e558302a2daf5ecd671c14e84a52be1b89490e7 Mon Sep 17 00:00:00 2001
From: CNE FICHEPOIL Pierre
Date: Tue, 26 Aug 2025 14:48:26 +0200
Subject: [PATCH 10/12] fix thinking-content eating closing think tag | ref
#8953
---
common/chat.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/common/chat.cpp b/common/chat.cpp
index 6e087625a5980..154b2a352d286 100644
--- a/common/chat.cpp
+++ b/common/chat.cpp
@@ -1829,7 +1829,7 @@ static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat
auto tool_call = builder.add_rule("tool_call", string_join(tool_call_alts, " | "));
builder.add_rule("thinking-start", "\"\"");
- builder.add_rule("thinking-content", "[^\\x00]*");
+ builder.add_rule("thinking-content", "( [^<] | \"<\" [^/] | \"\" [^t] | \"] )*");
builder.add_rule("thinking-end", "\"\" space");
//thinking grammar logic depending on if thinking_forced_open was to true (so already opened (and maybe closed)) and if thinking is even allowed
From e62cd709e793a7acfbc5f9508960db6f0929a21f Mon Sep 17 00:00:00 2001
From: CNE FICHEPOIL Pierre
Date: Tue, 26 Aug 2025 15:01:36 +0200
Subject: [PATCH 11/12] removed `?` from grammar as it doesn't crash on linux,
probably worth it's own issue
---
common/chat.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/common/chat.cpp b/common/chat.cpp
index 154b2a352d286..ff4ca39ad7e78 100644
--- a/common/chat.cpp
+++ b/common/chat.cpp
@@ -1837,11 +1837,11 @@ static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat
if (extra_context["enable_thinking"]) {
if (data.thinking_forced_open) {
//thinking tag was already opened by used so we don't need to add it again
- thinking_grammar_logic = "(thinking-content thinking-end)? ";
+ thinking_grammar_logic = "(thinking-content thinking-end) ";
}
else
{
- thinking_grammar_logic = "(thinking-start thinking-content thinking-end)? ";
+ thinking_grammar_logic = "(thinking-start thinking-content thinking-end) ";
}
}
From 310701ba1bebc21384383558e0e410be5dbc7736 Mon Sep 17 00:00:00 2001
From: Pierre F
Date: Thu, 28 Aug 2025 22:23:18 +0200
Subject: [PATCH 12/12] fixed crash with "auto" mode, trigger was missing
---
common/chat.cpp | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/common/chat.cpp b/common/chat.cpp
index ff4ca39ad7e78..89fbf479b8fa5 100644
--- a/common/chat.cpp
+++ b/common/chat.cpp
@@ -1835,6 +1835,10 @@ static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat
//thinking grammar logic depending on if thinking_forced_open was to true (so already opened (and maybe closed)) and if thinking is even allowed
std::string thinking_grammar_logic = ""; // thinking tag was closed or not supported/wanted
if (extra_context["enable_thinking"]) {
+ data.grammar_triggers.push_back({
+ COMMON_GRAMMAR_TRIGGER_TYPE_WORD,
+ data.thinking_forced_open ? "" : ""
+ });
if (data.thinking_forced_open) {
//thinking tag was already opened by used so we don't need to add it again
thinking_grammar_logic = "(thinking-content thinking-end) ";