Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
175 changes: 175 additions & 0 deletions common/chat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -643,6 +643,7 @@ const char * common_chat_format_name(common_chat_format format) {
case COMMON_CHAT_FORMAT_NEMOTRON_V2: return "Nemotron V2";
case COMMON_CHAT_FORMAT_APERTUS: return "Apertus";
case COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS: return "LFM2 with JSON tools";
case COMMON_CHAT_FORMAT_MINIMAX_M2: return "MiniMax M2";
default:
throw std::runtime_error("Unknown chat format");
}
Expand Down Expand Up @@ -790,6 +791,22 @@ static void foreach_function(const json & tools, const std::function<void(const
}
}

static std::set<std::string> get_required_parameters(const json & params) {
std::set<std::string> retval;
if (!params.empty()) {
for (const auto& element : params.array()) {
if (element.is_string()) {
retval.emplace(element.get<std::string>());
}
}
}
return retval;
}

static std::string gr_optional(std::string rule) {
return "( " + rule + " )?";
}

static std::string apply(
const common_chat_template & tmpl,
const struct templates_params & inputs,
Expand Down Expand Up @@ -2791,6 +2808,156 @@ static void common_chat_parse_seed_oss(common_chat_msg_parser & builder) {
}
}

static common_chat_params common_chat_params_init_minimax_m2(
const common_chat_template & tmpl,
templates_params & params,
const common_chat_templates_inputs & inputs)
{
common_chat_params data;
data.prompt = apply(tmpl, params);
data.format = COMMON_CHAT_FORMAT_MINIMAX_M2;
if (string_ends_with(data.prompt, "<think>\n")) { // Minimax adds a new line at the start of reasoning content
if (!inputs.enable_thinking) {
data.prompt += "</think>";
} else {
data.thinking_forced_open = true;
}
}

if (params.tools.is_array() && !params.tools.empty()) {
data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
std::vector<std::string> tool_rules;
foreach_function(params.tools, [&](const json & tool) {
const auto & function = tool.at("function");
std::string name = function.at("name");
auto parameters = function.at("parameters");
builder.resolve_refs(parameters);

// Create rule for Seed-OSS function call format
std::string param_rules;
if (parameters.contains("properties")) {
std::set<std::string> requiredParameters;
if (parameters.contains("required")) {
requiredParameters = get_required_parameters(parameters.at("required"));
}
for (const auto & [key, value] : parameters.at("properties").items()) {
bool required = requiredParameters.count(key) > 0;
std::string specific_param_rules = "\"<parameter name=\\\"" + key + "\\\">\" " + builder.add_schema(name + "-arg-" + key, value) + " \"</parameter>\" space ";
param_rules += required ? specific_param_rules : gr_optional(specific_param_rules);
}
}
tool_rules.push_back(builder.add_rule(name + "-call",
"\"<minimax:tool_call>\" space \"<invoke name=\\\"" + name + "\\\">\" space " +
param_rules + " \"</invoke>\" space \"</minimax:tool_call>\""));
});

data.grammar_triggers.push_back({ COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<minimax:tool_call>" });

data.preserved_tokens = {
"<minimax:tool_call>", "</minimax:tool_call>", "<think>", "</think>",
"<function", "</function>", "<parameter", "</parameter>",
};

builder.add_rule("root", string_join(tool_rules, " | "));
});
}
return data;
}

static void common_chat_parse_minimax_m2(common_chat_msg_parser & builder) {
// Parse thinking tags first - this handles the main reasoning content
// Chat template doesn't seem to handle interleaving thinking, so we don't worry about it either
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are we sure we're using the correct definition of interleaved thinking here? I don't think it means the CoT is interleaved with the content during generation, but rather it is interleaved in the entire prompt during multi-turn tool calling sessions. It seems to behave very similarly to gpt-oss. None of my testing, granted at Q2_XL, seems to indicate that the CoT is interleaved during generation. It's also only applied if the last message is a tool response.

Using the proposed fix for tool response support by @ochafik, it works as is if I pass reasoning_content with the assistant messages. Without this fix, the tool messages are transformed to user by the polyfill.

Template Example
curl -X POST http://localhost:8080/apply-template \
  -H "Content-Type: application/json" \
  -d '{
    "messages": [
      {
        "role": "system",
        "content": "You are a weather man"
      },
      {
        "role": "user",
        "content": "Can you compare the weather at New York and Los Angeles?"
      },
      {
        "role": "assistant",
        "reasoning_content": "I need to get the weather of New York and Los Angeles, let me do New York first.",
        "tool_calls": [
          {
            "id": "1",
            "type": "function",
            "function": {
              "name": "get_weather",
              "arguments": "{\"city\": \"New York\"}"
            }
          }
        ]
      },
      {
        "role": "tool",
        "tool_call_id": "1",
        "content": "50 F"
      }
    ],
    "tools": [
      {
        "type": "function",
        "function": {
          "name": "get_weather",
          "description": "Get the current weather for a specified city",
          "parameters": {
            "type": "object",
            "properties": {
              "city": {
                "type": "string",
                "description": "The city name, e.g. San Francisco"
              }
            },
            "required": ["city"]
          }
        }
      }
    ]
  }'
]~b]system
You are a weather man

# Tools
You may call one or more tools to assist with the user query.
Here are the tools available in JSONSchema format:

<tools>
<tool>{"name": "get_weather", "description": "Get the current weather for a specified city", "parameters": {"type": "object", "properties": {"city": {"type": "string", "description": "The city name, e.g. San Francisco"}}, "required": ["city"]}}</tool>
</tools>

When making tool calls, use XML format to invoke tools and pass parameters:

<minimax:tool_call>
<invoke name="tool-name-1">
<parameter name="param-key-1">param-value-1</parameter>
<parameter name="param-key-2">param-value-2</parameter>
...
</invoke>
</minimax:tool_call>[e~[
]~b]user
Can you compare the weather at New York and Los Angeles?[e~[
]~b]ai
<think>
I need to get the weather of New York and Los Angeles, let me do New York first.
</think>


<minimax:tool_call>
<invoke name="get_weather">
<parameter name="city">New York</parameter>
</invoke>
</minimax:tool_call>[e~[
]~b]tool
<response>50 F</response>[e~[
]~b]ai
<think>


It does place the burden of returning reasoning_content on the clients.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@aldehir That's actually a good clarification - I was somehow convinced that interlaving reasoning actually meant content blocks with multiple reasoning / content chunks intertwined (I think that the Anthropic protocol allows something like that). We shouldn't have a problem with it if it's just tool calls intertwined with reasoning blocks.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@hksdpc255 please take a look at this discussion, since I feel you're repeating the same error (with using reasoning-format none + literally outputting the opening <think> tag).

Copy link

@hksdpc255 hksdpc255 Nov 5, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@pwilkin Thanks for pointing that out. I actually had the same misunderstanding about interleaved thinking at first.

Because of that, I initially implemented full support for reasoning and normal content being interleaved during generation. Later I realized that this wasn’t really required in our current setup. But since I already had a custom test harness for it, I verified that my implementation can indeed handle such interleaved reasoning/content streams. It might still be useful in the future if models start emitting that pattern more often.

As for --reasoning-format none, my understanding was that it means not to treat reasoning specially, but to include it directly in the normal assistant message. This interpretation seemed consistent with how some chat templates (like GLM 4.5 / 4.6 and MiniMax M2) automatically detect <think> blocks in the main content, extract them into reasoning_content, and remove them from the visible answer. That behavior is quite helpful for clients that don’t support returning reasoning_content back to the server — which I believe is the case for most code agents.

I’m currently using --reasoning-format none to serve the Zed editor, and in that setup, MiniMax M2 performs impressively well on fairly complex tasks.

However, I might have misunderstood the actual purpose of --reasoning-format none. If so, I’d really appreciate clarification. And if it’s not meant for this kind of use case, I think introducing a new --reasoning-format mode to explicitly support it would make a lot of sense.

builder.try_parse_reasoning("<think>", "</think>");

if (!builder.syntax().parse_tool_calls) {
builder.add_content(builder.consume_rest());
return;
}

static const std::string tool_call_tag = "minimax:tool_call";
static const std::string function_tag = "invoke";
static const std::string parameter_tag = "parameter";

// Parse tool calls - similar to Seed OSS (pseudo-XML), but different syntax
static const common_regex tool_call_begin_regex("<" + tool_call_tag + ">");
static const common_regex tool_call_end_regex("</" + tool_call_tag + ">");
static const common_regex function_regex("<" + function_tag + " name=\"([^\"]+)\">");
static const common_regex param_regex("<" + parameter_tag + " name=\"([^\"]+)\">");

while (auto tool_res = builder.try_find_regex(tool_call_begin_regex)) {
builder.consume_spaces(); // Consume whitespace after <seed:tool_call>

// Look for function call inside tool call, ignore any content before it
if (auto func_res = builder.try_find_regex(function_regex, std::string::npos, false)) {
auto function_name = builder.str(func_res->groups[1]);

// Parse XML parameters <parameter name=\"name\">value</parameter>
json args = json::object();
// Parse all parameters
while (auto param_res = builder.try_find_regex(param_regex, std::string::npos, false)) {
// again, ignore noise around parameters
auto param_name = builder.str(param_res->groups[1]);
builder.move_to(param_res->groups[0].end);
builder.consume_spaces(); // Consume whitespace after parameter
auto savedPos = builder.pos();
if (auto param_parse = builder.try_find_literal("</" + parameter_tag + ">")) {
auto param = param_parse->prelude;
builder.move_to(savedPos);
try {
if (auto param_res = builder.try_consume_json()) {
args[param_name] = param_res->json;
} else {
args[param_name] = param;
}
} catch (json::exception &) {
args[param_name] = param;
}
} else {
throw common_chat_msg_partial_exception("Incomplete tool parameter");
}
}
// Look for closing function tag
auto end_func = builder.try_find_literal("</" + function_tag + ">");
if (end_func) {
builder.move_to(end_func->groups[0].end);
builder.consume_spaces(); // Consume whitespace after </function>

// Add the tool call with parsed arguments, but only if we REALLY got the literal
auto eaten_fragment = builder.input().substr(end_func->groups[0].begin, end_func->groups[0].end);
auto funlen = std::string("</" + function_tag + ">").length();
if (eaten_fragment.length() >= funlen && eaten_fragment.substr(0, funlen) == std::string("</" + function_tag + ">")) {
if (!builder.add_tool_call(function_name, "", args.dump())) {
throw common_chat_msg_partial_exception("Incomplete tool call");
}
} else {
throw common_chat_msg_partial_exception("Incomplete tool call");
}
} else {
throw common_chat_msg_partial_exception("Incomplete tool call");
}
// Look for closing tool call tag
if (auto end_tool = builder.try_find_regex(tool_call_end_regex, std::string::npos, false)) {
builder.move_to(end_tool->groups[0].end);
builder.consume_spaces(); // Consume trailing whitespace after tool call
} else {
throw common_chat_msg_partial_exception("Incomplete tool call");
}
} else {
// No function found - don't consume content here, let it be handled at the end
break;
}
}

// Consume any remaining whitespace after all tool call processing
builder.consume_spaces();
auto remaining = builder.consume_rest();
// If there's any non-whitespace content remaining, add it as content
if (!string_strip(remaining).empty()) {
builder.add_content(remaining);
}
}

static common_chat_params common_chat_params_init_without_tools(const common_chat_template & tmpl, const struct templates_params & inputs) {
common_chat_params data;
data.prompt = apply(tmpl, inputs);
Expand Down Expand Up @@ -2942,6 +3109,11 @@ static common_chat_params common_chat_templates_apply_jinja(
return common_chat_params_init_seed_oss(tmpl, params, inputs);
}

// MiniMax M2
if (src.find("<minimax:tool_call>") != std::string::npos) {
return common_chat_params_init_minimax_m2(tmpl, params, inputs);
}

// Nemotron v2
if (src.find("<SPECIAL_10>") != std::string::npos) {
return common_chat_params_init_nemotron_v2(tmpl, params);
Expand Down Expand Up @@ -3139,6 +3311,9 @@ static void common_chat_parse(common_chat_msg_parser & builder) {
case COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS:
common_chat_parse_lfm2(builder);
break;
case COMMON_CHAT_FORMAT_MINIMAX_M2:
common_chat_parse_minimax_m2(builder);
break;
default:
throw std::runtime_error(std::string("Unsupported format: ") + common_chat_format_name(builder.syntax().format));
}
Expand Down
1 change: 1 addition & 0 deletions common/chat.h
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ enum common_chat_format {
COMMON_CHAT_FORMAT_NEMOTRON_V2,
COMMON_CHAT_FORMAT_APERTUS,
COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS,
COMMON_CHAT_FORMAT_MINIMAX_M2,

COMMON_CHAT_FORMAT_COUNT, // Not a format, just the # formats
};
Expand Down
171 changes: 171 additions & 0 deletions models/templates/unsloth-MiniMax-M2.jinja
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
{# Unsloth template fixes #}
{# ----------‑‑‑ special token variables ‑‑‑---------- #}
{%- set toolcall_begin_token = '<minimax:tool_call>' -%}
{%- set toolcall_end_token = '</minimax:tool_call>' -%}
{#- Tool Rendering Functions ============================================== -#}
{%- macro render_tool_namespace(namespace_name, tool_list) -%}
{%- for tool in tool_list -%}
<tool>{{ tool.function | tojson | string }}</tool>
{% endfor -%}
{%- endmacro -%}
{%- macro visible_text(content) -%}
{%- if content is string -%}
{{ content }}
{%- elif content is iterable and content is not mapping -%}
{%- for item in content -%}
{%- if item is mapping and item.type == 'text' -%}
{{- item.text }}
{%- elif item is string -%}
{{- item }}
{%- endif -%}
{%- endfor -%}
{%- else -%}
{{- content }}
{%- endif -%}
{%- endmacro -%}
{#- System Message Construction ============================================ -#}
{%- macro build_system_message(system_message) -%}
{%- if system_message and system_message.content -%}
{{- visible_text(system_message.content) }}
{%- else -%}
{%- if model_identity is not defined -%}
{%- set model_identity = "You are a helpful assistant." -%}
{%- endif -%}
{{- model_identity }}
{%- endif -%}

{#- Handle current_date -#}
{%- if system_message and system_message.current_date -%}
{{- '\n' ~ 'Current date: ' + system_message.current_date }}
{%- endif -%}
{#- Handle current_location -#}
{%- if system_message and system_message.current_location -%}
{{- '\n' ~ 'Current location: ' + system_message.current_location }}
{%- endif -%}
{%- endmacro -%}
{#- Main Template Logic ================================================= -#}
{#- Extract system message (only first message if it's system) -#}
{%- set system_message = none -%}
{%- set conversation_messages = messages -%}
{%- if messages and messages[0].role == "system" -%}
{%- set system_message = messages[0] -%}
{%- set conversation_messages = messages[1:] -%}
{%- endif -%}
{#- Get the last user message turn, for interleved thinking -#}
{%- set ns = namespace(last_user_index=-1) %}
{% for m in conversation_messages %}
{%- if m.role == 'user' %}
{% set ns.last_user_index = loop.index0 -%}
{%- endif %}
{%- endfor %}
{#- Render system message -#}
{{- ']~!b[' ~ ']~b]system' ~ '\n' }}
{{- build_system_message(system_message) }}
{#- Render tools if available -#}
{%- if tools -%}
{{- '\n\n' ~ '# Tools' ~ '\n' ~ 'You may call one or more tools to assist with the user query.\nHere are the tools available in JSONSchema format:' ~ '\n' }}
{{- '\n' ~ '<tools>' ~ '\n' }}
{{- render_tool_namespace("functions", tools) }}
{{- '</tools>' ~ '\n\n' }}
{{- 'When making tool calls, use XML format to invoke tools and pass parameters:' ~ '\n' }}
{{- '\n' ~ toolcall_begin_token }}
<invoke name="tool-name-1">
<parameter name="param-key-1">param-value-1</parameter>
<parameter name="param-key-2">param-value-2</parameter>
...
</invoke>
{{- '\n' ~ toolcall_end_token }}
{%- endif -%}
{{- '[e~[\n' }}

{#- Render messages -#}
{%- set last_tool_call = namespace(name=none) -%}
{%- for message in conversation_messages -%}
{%- if message.role == 'assistant' -%}
{#- Only render reasoning_content if no user message follows -#}
{{- ']~b]ai' ~ '\n' }}

{%- set reasoning_content = '' %}
{%- set content = visible_text(message.content) %}
{%- if message.reasoning_content is string %}
{%- set reasoning_content = message.reasoning_content %}
{%- else %}
{%- if '</think>' in content %}
{# Unsloth template fixes - must change to for loop since llama.cpp will error out if not #}
{%- set parts = content.split('</think>') %}
{%- for part in parts %}
{%- if loop.index0 == 0 -%}
{%- set reasoning_content = part.strip('\n') %}
{%- set reasoning_content = (reasoning_content.split('<think>')|last) %}
{%- set reasoning_content = reasoning_content.strip('\n') -%}
{%- else -%}
{%- set content = part.strip('\n') %}
{%- endif %}
{%- endfor %}
{%- endif %}
{%- endif %}
{%- if reasoning_content and loop.index0 > ns.last_user_index -%}
{{- '<think>' ~ '\n' ~ reasoning_content ~ '\n' ~ '</think>' ~ '\n\n' }}
{%- endif -%}
{%- if content -%}
{{- content }}
{%- endif -%}
{%- if message.tool_calls -%}
{{- '\n' ~ toolcall_begin_token ~ '\n' }}

{%- for tool_call in message.tool_calls -%}
{%- if tool_call.function %}
{%- set tool_call = tool_call.function %}
{%- endif %}
{{- '<invoke name="' + tool_call.name + '">\n' }}
{%- if tool_call.arguments is defined and tool_call.arguments is mapping -%}
{% set _args = tool_call.arguments %}
{%- for k, v in _args|items %}
{{- '<parameter name="' + k + '">' }}
{{- v | tojson | string if v is not string else v }}
{{- '</parameter>' }}
{% endfor %}{%- endif -%}
{{- '</invoke>' ~ '\n' }}
{%- endfor -%}

{{- toolcall_end_token}}
{%- set last_tool_call.name = message.tool_calls[-1].name -%}
{%- else -%}
{%- set last_tool_call.name = none -%}
{%- endif -%}
{{- '[e~[' ~ '\n' }}

{%- elif message.role == 'tool' -%}
{%- if last_tool_call.name is none -%}
{{- raise_exception("Message has tool role, but there was no previous assistant message with a tool call!") }}
{%- endif -%}
{%- if loop.first or (conversation_messages[loop.index0 - 1].role != 'tool') -%}
{{- ']~b]tool' }}
{%- endif -%}
{%- if message.content is string -%}
{{- '\n<response>' }}
{{- message.content }}
{{- '</response>' }}
{%- else -%}
{%- for tr in message.content -%}
{{- '\n<response>' }}
{{- tr.output if tr.output is defined else (tr.text if tr.type == 'text' and tr.text is defined else tr) }}
{{- '\n</response>' }}
{%- endfor -%}
{%- endif -%}
{%- if loop.last or (conversation_messages[loop.index0 + 1].role != 'tool') -%}
{{- '[e~[\n' -}}
{%- endif -%}

{%- elif message.role == 'user' -%}
{{- ']~b]user' ~ '\n' }}
{{- visible_text(message.content) }}
{{- '[e~[' ~ '\n' }}
{%- endif -%}
{%- endfor -%}

{#- Generation prompt -#}
{%- if add_generation_prompt -%}
{{- ']~b]ai' ~ '\n' ~ '<think>' ~ '\n' }}
{%- endif -%}
{# Copyright 2025-present Unsloth. Apache 2.0 License. #}
Loading
Loading