diff --git a/common/chat.cpp b/common/chat.cpp
index 955c42852a9be..0150a1f0fa61b 100644
--- a/common/chat.cpp
+++ b/common/chat.cpp
@@ -1759,6 +1759,13 @@ static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat
data.prompt = apply(tmpl, inputs, /* messages_override =*/ std::nullopt, /* tools_override= */ std::nullopt, extra_context);
data.format = COMMON_CHAT_FORMAT_HERMES_2_PRO;
+ auto supports_thinking = tmpl.source().find("") != std::string::npos;
+
+ // you should not be able to call enable_thinking if is not supported
+ if (!supports_thinking && extra_context["enable_thinking"]) {
+ extra_context["enable_thinking"] = false;
+ }
+
if (string_ends_with(data.prompt, "\n")) {
if (!extra_context["enable_thinking"]) {
data.prompt += "";
@@ -1821,9 +1828,31 @@ static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat
tool_call_alts.push_back(
"( \"```\\n\" | \"```json\\n\" | \"```xml\\n\" ) space " + wrappable_tool_call + " space \"```\" space ");
auto tool_call = builder.add_rule("tool_call", string_join(tool_call_alts, " | "));
- builder.add_rule("root",
- std::string(data.thinking_forced_open ? "( \"\" space )? " : "") +
- (inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call));
+
+ builder.add_rule("thinking-start", "\"\"");
+ builder.add_rule("thinking-content", "( [^<] | \"<\" [^/] | \"\" [^t] | \"] )*");
+ builder.add_rule("thinking-end", "\"\" space");
+
+ //thinking grammar logic depending on if thinking_forced_open was to true (so already opened (and maybe closed)) and if thinking is even allowed
+ std::string thinking_grammar_logic = ""; // thinking tag was closed or not supported/wanted
+ if (extra_context["enable_thinking"]) {
+ data.grammar_triggers.push_back({
+ COMMON_GRAMMAR_TRIGGER_TYPE_WORD,
+ data.thinking_forced_open ? "" : ""
+ });
+ if (data.thinking_forced_open) {
+ //thinking tag was already opened by used so we don't need to add it again
+ thinking_grammar_logic = "(thinking-content thinking-end) ";
+ }
+ else
+ {
+ thinking_grammar_logic = "(thinking-start thinking-content thinking-end) ";
+ }
+ }
+
+
+ builder.add_rule("root", thinking_grammar_logic + (inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call));
+
// Trigger on some common known "good bad" outputs (only from the start and with a json that's about a specific argument name to avoid false positives)
data.grammar_triggers.push_back({
COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,