diff --git a/common/chat.cpp b/common/chat.cpp index 938872e82ee1d..fc31637ef91d7 100644 --- a/common/chat.cpp +++ b/common/chat.cpp @@ -643,6 +643,7 @@ const char * common_chat_format_name(common_chat_format format) { case COMMON_CHAT_FORMAT_NEMOTRON_V2: return "Nemotron V2"; case COMMON_CHAT_FORMAT_APERTUS: return "Apertus"; case COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS: return "LFM2 with JSON tools"; + case COMMON_CHAT_FORMAT_MINIMAX_M2: return "MiniMax M2"; default: throw std::runtime_error("Unknown chat format"); } @@ -790,6 +791,22 @@ static void foreach_function(const json & tools, const std::function get_required_parameters(const json & params) { + std::set retval; + if (!params.empty()) { + for (const auto& element : params.array()) { + if (element.is_string()) { + retval.emplace(element.get()); + } + } + } + return retval; +} + +static std::string gr_optional(std::string rule) { + return "( " + rule + " )?"; +} + static std::string apply( const common_chat_template & tmpl, const struct templates_params & inputs, @@ -2791,6 +2808,156 @@ static void common_chat_parse_seed_oss(common_chat_msg_parser & builder) { } } +static common_chat_params common_chat_params_init_minimax_m2( + const common_chat_template & tmpl, + templates_params & params, + const common_chat_templates_inputs & inputs) +{ + common_chat_params data; + data.prompt = apply(tmpl, params); + data.format = COMMON_CHAT_FORMAT_MINIMAX_M2; + if (string_ends_with(data.prompt, "\n")) { // Minimax adds a new line at the start of reasoning content + if (!inputs.enable_thinking) { + data.prompt += ""; + } else { + data.thinking_forced_open = true; + } + } + + if (params.tools.is_array() && !params.tools.empty()) { + data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED; + data.grammar = build_grammar([&](const common_grammar_builder & builder) { + std::vector tool_rules; + foreach_function(params.tools, [&](const json & tool) { + const auto & function = tool.at("function"); + std::string name = function.at("name"); + auto parameters = function.at("parameters"); + builder.resolve_refs(parameters); + + // Create rule for Seed-OSS function call format + std::string param_rules; + if (parameters.contains("properties")) { + std::set requiredParameters; + if (parameters.contains("required")) { + requiredParameters = get_required_parameters(parameters.at("required")); + } + for (const auto & [key, value] : parameters.at("properties").items()) { + bool required = requiredParameters.count(key) > 0; + std::string specific_param_rules = "\"\" " + builder.add_schema(name + "-arg-" + key, value) + " \"\" space "; + param_rules += required ? specific_param_rules : gr_optional(specific_param_rules); + } + } + tool_rules.push_back(builder.add_rule(name + "-call", + "\"\" space \"\" space " + + param_rules + " \"\" space \"\"")); + }); + + data.grammar_triggers.push_back({ COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "" }); + + data.preserved_tokens = { + "", "", "", "", + "", "", + }; + + builder.add_rule("root", string_join(tool_rules, " | ")); + }); + } + return data; +} + +static void common_chat_parse_minimax_m2(common_chat_msg_parser & builder) { + // Parse thinking tags first - this handles the main reasoning content + // Chat template doesn't seem to handle interleaving thinking, so we don't worry about it either + builder.try_parse_reasoning("", ""); + + if (!builder.syntax().parse_tool_calls) { + builder.add_content(builder.consume_rest()); + return; + } + + static const std::string tool_call_tag = "minimax:tool_call"; + static const std::string function_tag = "invoke"; + static const std::string parameter_tag = "parameter"; + + // Parse tool calls - similar to Seed OSS (pseudo-XML), but different syntax + static const common_regex tool_call_begin_regex("<" + tool_call_tag + ">"); + static const common_regex tool_call_end_regex(""); + static const common_regex function_regex("<" + function_tag + " name=\"([^\"]+)\">"); + static const common_regex param_regex("<" + parameter_tag + " name=\"([^\"]+)\">"); + + while (auto tool_res = builder.try_find_regex(tool_call_begin_regex)) { + builder.consume_spaces(); // Consume whitespace after + + // Look for function call inside tool call, ignore any content before it + if (auto func_res = builder.try_find_regex(function_regex, std::string::npos, false)) { + auto function_name = builder.str(func_res->groups[1]); + + // Parse XML parameters value + json args = json::object(); + // Parse all parameters + while (auto param_res = builder.try_find_regex(param_regex, std::string::npos, false)) { + // again, ignore noise around parameters + auto param_name = builder.str(param_res->groups[1]); + builder.move_to(param_res->groups[0].end); + builder.consume_spaces(); // Consume whitespace after parameter + auto savedPos = builder.pos(); + if (auto param_parse = builder.try_find_literal("")) { + auto param = param_parse->prelude; + builder.move_to(savedPos); + try { + if (auto param_res = builder.try_consume_json()) { + args[param_name] = param_res->json; + } else { + args[param_name] = param; + } + } catch (json::exception &) { + args[param_name] = param; + } + } else { + throw common_chat_msg_partial_exception("Incomplete tool parameter"); + } + } + // Look for closing function tag + auto end_func = builder.try_find_literal(""); + if (end_func) { + builder.move_to(end_func->groups[0].end); + builder.consume_spaces(); // Consume whitespace after + + // Add the tool call with parsed arguments, but only if we REALLY got the literal + auto eaten_fragment = builder.input().substr(end_func->groups[0].begin, end_func->groups[0].end); + auto funlen = std::string("").length(); + if (eaten_fragment.length() >= funlen && eaten_fragment.substr(0, funlen) == std::string("")) { + if (!builder.add_tool_call(function_name, "", args.dump())) { + throw common_chat_msg_partial_exception("Incomplete tool call"); + } + } else { + throw common_chat_msg_partial_exception("Incomplete tool call"); + } + } else { + throw common_chat_msg_partial_exception("Incomplete tool call"); + } + // Look for closing tool call tag + if (auto end_tool = builder.try_find_regex(tool_call_end_regex, std::string::npos, false)) { + builder.move_to(end_tool->groups[0].end); + builder.consume_spaces(); // Consume trailing whitespace after tool call + } else { + throw common_chat_msg_partial_exception("Incomplete tool call"); + } + } else { + // No function found - don't consume content here, let it be handled at the end + break; + } + } + + // Consume any remaining whitespace after all tool call processing + builder.consume_spaces(); + auto remaining = builder.consume_rest(); + // If there's any non-whitespace content remaining, add it as content + if (!string_strip(remaining).empty()) { + builder.add_content(remaining); + } +} + static common_chat_params common_chat_params_init_without_tools(const common_chat_template & tmpl, const struct templates_params & inputs) { common_chat_params data; data.prompt = apply(tmpl, inputs); @@ -2942,6 +3109,11 @@ static common_chat_params common_chat_templates_apply_jinja( return common_chat_params_init_seed_oss(tmpl, params, inputs); } + // MiniMax M2 + if (src.find("") != std::string::npos) { + return common_chat_params_init_minimax_m2(tmpl, params, inputs); + } + // Nemotron v2 if (src.find("") != std::string::npos) { return common_chat_params_init_nemotron_v2(tmpl, params); @@ -3139,6 +3311,9 @@ static void common_chat_parse(common_chat_msg_parser & builder) { case COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS: common_chat_parse_lfm2(builder); break; + case COMMON_CHAT_FORMAT_MINIMAX_M2: + common_chat_parse_minimax_m2(builder); + break; default: throw std::runtime_error(std::string("Unsupported format: ") + common_chat_format_name(builder.syntax().format)); } diff --git a/common/chat.h b/common/chat.h index 50efb0d4e516f..7c6dd07080e49 100644 --- a/common/chat.h +++ b/common/chat.h @@ -117,6 +117,7 @@ enum common_chat_format { COMMON_CHAT_FORMAT_NEMOTRON_V2, COMMON_CHAT_FORMAT_APERTUS, COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS, + COMMON_CHAT_FORMAT_MINIMAX_M2, COMMON_CHAT_FORMAT_COUNT, // Not a format, just the # formats }; diff --git a/models/templates/unsloth-MiniMax-M2.jinja b/models/templates/unsloth-MiniMax-M2.jinja new file mode 100644 index 0000000000000..44da1cae9ee10 --- /dev/null +++ b/models/templates/unsloth-MiniMax-M2.jinja @@ -0,0 +1,171 @@ +{# Unsloth template fixes #} +{# ----------‑‑‑ special token variables ‑‑‑---------- #} +{%- set toolcall_begin_token = '' -%} +{%- set toolcall_end_token = '' -%} +{#- Tool Rendering Functions ============================================== -#} +{%- macro render_tool_namespace(namespace_name, tool_list) -%} +{%- for tool in tool_list -%} +{{ tool.function | tojson | string }} +{% endfor -%} +{%- endmacro -%} +{%- macro visible_text(content) -%} + {%- if content is string -%} + {{ content }} + {%- elif content is iterable and content is not mapping -%} + {%- for item in content -%} + {%- if item is mapping and item.type == 'text' -%} + {{- item.text }} + {%- elif item is string -%} + {{- item }} + {%- endif -%} + {%- endfor -%} + {%- else -%} + {{- content }} + {%- endif -%} +{%- endmacro -%} +{#- System Message Construction ============================================ -#} +{%- macro build_system_message(system_message) -%} + {%- if system_message and system_message.content -%} + {{- visible_text(system_message.content) }} + {%- else -%} + {%- if model_identity is not defined -%} + {%- set model_identity = "You are a helpful assistant." -%} + {%- endif -%} + {{- model_identity }} + {%- endif -%} + + {#- Handle current_date -#} + {%- if system_message and system_message.current_date -%} + {{- '\n' ~ 'Current date: ' + system_message.current_date }} + {%- endif -%} + {#- Handle current_location -#} + {%- if system_message and system_message.current_location -%} + {{- '\n' ~ 'Current location: ' + system_message.current_location }} + {%- endif -%} +{%- endmacro -%} +{#- Main Template Logic ================================================= -#} +{#- Extract system message (only first message if it's system) -#} +{%- set system_message = none -%} +{%- set conversation_messages = messages -%} +{%- if messages and messages[0].role == "system" -%} + {%- set system_message = messages[0] -%} + {%- set conversation_messages = messages[1:] -%} +{%- endif -%} +{#- Get the last user message turn, for interleved thinking -#} +{%- set ns = namespace(last_user_index=-1) %} +{% for m in conversation_messages %} + {%- if m.role == 'user' %} + {% set ns.last_user_index = loop.index0 -%} + {%- endif %} +{%- endfor %} +{#- Render system message -#} +{{- ']~!b[' ~ ']~b]system' ~ '\n' }} +{{- build_system_message(system_message) }} +{#- Render tools if available -#} +{%- if tools -%} + {{- '\n\n' ~ '# Tools' ~ '\n' ~ 'You may call one or more tools to assist with the user query.\nHere are the tools available in JSONSchema format:' ~ '\n' }} + {{- '\n' ~ '' ~ '\n' }} + {{- render_tool_namespace("functions", tools) }} + {{- '' ~ '\n\n' }} +{{- 'When making tool calls, use XML format to invoke tools and pass parameters:' ~ '\n' }} +{{- '\n' ~ toolcall_begin_token }} + +param-value-1 +param-value-2 +... + +{{- '\n' ~ toolcall_end_token }} +{%- endif -%} +{{- '[e~[\n' }} + +{#- Render messages -#} +{%- set last_tool_call = namespace(name=none) -%} +{%- for message in conversation_messages -%} + {%- if message.role == 'assistant' -%} + {#- Only render reasoning_content if no user message follows -#} + {{- ']~b]ai' ~ '\n' }} + + {%- set reasoning_content = '' %} + {%- set content = visible_text(message.content) %} + {%- if message.reasoning_content is string %} + {%- set reasoning_content = message.reasoning_content %} + {%- else %} + {%- if '' in content %} + {# Unsloth template fixes - must change to for loop since llama.cpp will error out if not #} + {%- set parts = content.split('') %} + {%- for part in parts %} + {%- if loop.index0 == 0 -%} + {%- set reasoning_content = part.strip('\n') %} + {%- set reasoning_content = (reasoning_content.split('')|last) %} + {%- set reasoning_content = reasoning_content.strip('\n') -%} + {%- else -%} + {%- set content = part.strip('\n') %} + {%- endif %} + {%- endfor %} + {%- endif %} + {%- endif %} + {%- if reasoning_content and loop.index0 > ns.last_user_index -%} + {{- '' ~ '\n' ~ reasoning_content ~ '\n' ~ '' ~ '\n\n' }} + {%- endif -%} + {%- if content -%} + {{- content }} + {%- endif -%} + {%- if message.tool_calls -%} + {{- '\n' ~ toolcall_begin_token ~ '\n' }} + + {%- for tool_call in message.tool_calls -%} + {%- if tool_call.function %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {{- '\n' }} + {%- if tool_call.arguments is defined and tool_call.arguments is mapping -%} + {% set _args = tool_call.arguments %} + {%- for k, v in _args|items %} + {{- '' }} + {{- v | tojson | string if v is not string else v }} + {{- '' }} + {% endfor %}{%- endif -%} + {{- '' ~ '\n' }} + {%- endfor -%} + + {{- toolcall_end_token}} + {%- set last_tool_call.name = message.tool_calls[-1].name -%} + {%- else -%} + {%- set last_tool_call.name = none -%} + {%- endif -%} + {{- '[e~[' ~ '\n' }} + + {%- elif message.role == 'tool' -%} + {%- if last_tool_call.name is none -%} + {{- raise_exception("Message has tool role, but there was no previous assistant message with a tool call!") }} + {%- endif -%} + {%- if loop.first or (conversation_messages[loop.index0 - 1].role != 'tool') -%} + {{- ']~b]tool' }} + {%- endif -%} + {%- if message.content is string -%} + {{- '\n' }} + {{- message.content }} + {{- '' }} + {%- else -%} + {%- for tr in message.content -%} + {{- '\n' }} + {{- tr.output if tr.output is defined else (tr.text if tr.type == 'text' and tr.text is defined else tr) }} + {{- '\n' }} + {%- endfor -%} + {%- endif -%} + {%- if loop.last or (conversation_messages[loop.index0 + 1].role != 'tool') -%} + {{- '[e~[\n' -}} + {%- endif -%} + + {%- elif message.role == 'user' -%} + {{- ']~b]user' ~ '\n' }} + {{- visible_text(message.content) }} + {{- '[e~[' ~ '\n' }} + {%- endif -%} +{%- endfor -%} + +{#- Generation prompt -#} +{%- if add_generation_prompt -%} +{{- ']~b]ai' ~ '\n' ~ '' ~ '\n' }} +{%- endif -%} +{# Copyright 2025-present Unsloth. Apache 2.0 License. #} \ No newline at end of file diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp index 4a8ba849b3f8c..26c18fcfb02bf 100644 --- a/tests/test-chat.cpp +++ b/tests/test-chat.cpp @@ -75,6 +75,21 @@ static common_chat_msg normalize(const common_chat_msg & msg) { } return normalized; } + + +// trim whitespace from the beginning and end of a string +static std::string trim(const std::string & str) { + size_t start = 0; + size_t end = str.size(); + while (start < end && isspace(static_cast(str[start]))) { + start += 1; + } + while (end > start && isspace(static_cast(str[end - 1]))) { + end -= 1; + } + return str.substr(start, end - start); +} + template <> bool equals(const common_chat_msg & expected, const common_chat_msg & actual) { return normalize(expected) == normalize(actual); @@ -148,15 +163,15 @@ static std::string renormalize_json(const std::string & json_str) { return json_str; } } -static void assert_msg_equals(const common_chat_msg & expected, const common_chat_msg & actual) { +static void assert_msg_equals(const common_chat_msg & expected, const common_chat_msg & actual, bool ignore_whitespace_differences = false) { assert_equals(expected.role, actual.role); - assert_equals(expected.content, actual.content); + assert_equals(expected.content, ignore_whitespace_differences ? trim(actual.content) : actual.content); assert_equals(expected.content_parts.size(), actual.content_parts.size()); for (size_t i = 0; i < expected.content_parts.size(); i++) { const auto & expected_part = expected.content_parts[i]; const auto & actual_part = actual.content_parts[i]; assert_equals(expected_part.type, actual_part.type); - assert_equals(expected_part.text, actual_part.text); + assert_equals(expected_part.text, ignore_whitespace_differences ? trim(actual_part.text) : actual_part.text); } assert_equals(expected.reasoning_content, actual.reasoning_content); assert_equals(expected.tool_calls.size(), actual.tool_calls.size()); @@ -183,6 +198,24 @@ common_chat_tool special_function_tool { "required": ["arg1"] })", }; +common_chat_tool special_function_tool_with_optional_param { + /* .name = */ "special_function_with_opt", + /* .description = */ "I'm special but have optional stuff", + /* .parameters = */ R"({ + "type": "object", + "properties": { + "arg1": { + "type": "integer", + "description": "The arg." + }, + "arg2": { + "type": "integer", + "description": "The optional arg." + } + }, + "required": ["arg1"] + })", +}; common_chat_tool python_tool { /* .name = */ "python", /* .description = */ "an ipython interpreter", @@ -211,7 +244,7 @@ common_chat_tool code_interpreter_tool { "required": ["code"] })", }; -std::vector tools { special_function_tool, python_tool }; +std::vector tools { special_function_tool, special_function_tool_with_optional_param, python_tool }; std::vector llama_3_1_tools { special_function_tool, code_interpreter_tool }; struct delta_data { @@ -280,7 +313,9 @@ static void test_templates(const struct common_chat_templates * tmpls, const std const std::string & expected_delta = "", bool expect_grammar_triggered = true, bool test_grammar_if_triggered = true, - common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_NONE) { + common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_NONE, + bool ignore_whitespace_differences = false + ) { common_chat_msg user_message; user_message.role = "user"; user_message.content = "Hello, world!"; @@ -288,6 +323,9 @@ static void test_templates(const struct common_chat_templates * tmpls, const std for (const auto & tool_choice : std::vector {COMMON_CHAT_TOOL_CHOICE_AUTO, COMMON_CHAT_TOOL_CHOICE_REQUIRED}) { auto data = init_delta(tmpls, end_tokens, user_message, test_message, tools, tool_choice); if (!expected_delta.empty()) { + if (ignore_whitespace_differences) { + data.delta = trim(data.delta); + } assert_equals(expected_delta, data.delta); } @@ -296,7 +334,7 @@ static void test_templates(const struct common_chat_templates * tmpls, const std syntax.format = data.params.format; syntax.reasoning_format = reasoning_format; const auto msg = common_chat_parse(data.delta, /* is_partial= */ false, syntax); - assert_msg_equals(test_message, msg); + assert_msg_equals(test_message, msg, ignore_whitespace_differences); } if (!test_message.tool_calls.empty()) { @@ -417,6 +455,8 @@ const common_chat_msg message_assist_thoughts = simple_assist const common_chat_msg message_assist_thoughts_unopened_unparsed = simple_assist_msg("I'm\nthinkingHello, world!\nWhat's up?"); const common_chat_msg message_assist_thoughts_no_content = simple_assist_msg("", "I'm\nthinking"); const common_chat_msg message_assist_call = simple_assist_msg("", "", "special_function", "{\"arg1\": 1}"); +const common_chat_msg message_assist_call_noopt = simple_assist_msg("", "", "special_function_with_opt", "{\"arg1\": 1}"); +const common_chat_msg message_assist_call_withopt = simple_assist_msg("", "", "special_function_with_opt", "{\"arg1\": 1, \"arg2\": 2}"); const common_chat_msg message_assist_call_content = simple_assist_msg("Hello, world!\nWhat's up?", "", "special_function", "{\"arg1\":1}"); const common_chat_msg message_assist_call_empty_args = simple_assist_msg("", "", "special_function"); const common_chat_msg message_assist_call_cutoff_args = simple_assist_msg("", "", "special_function", "{\"arg"); @@ -2288,6 +2328,96 @@ Hey there!<|im_end|> // above verify edge cases and format variations for the tool call output format. } + { + auto tmpls = read_templates("models/templates/unsloth-MiniMax-M2.jinja"); + std::vector end_tokens{ "[e~[" }; + + assert_equals(COMMON_CHAT_FORMAT_MINIMAX_M2, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format); + assert_equals(COMMON_CHAT_FORMAT_MINIMAX_M2, common_chat_templates_apply(tmpls.get(), inputs_tools).format); + + // Test parsing regular content + assert_msg_equals(message_assist, + common_chat_parse( + "Hello, world!\nWhat's up?", + /* is_partial= */ false, + {COMMON_CHAT_FORMAT_MINIMAX_M2})); + + // Test parsing content with thinking + assert_msg_equals(message_assist_thoughts, + common_chat_parse( + "I'm\nthinkingHello, world!\nWhat's up?", + /* is_partial= */ false, + { + /* .format = */ COMMON_CHAT_FORMAT_MINIMAX_M2, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, + })); + + // Test parsing tool calls + assert_msg_equals(message_assist_call, + common_chat_parse( + "1", + /* is_partial= */ false, + {COMMON_CHAT_FORMAT_MINIMAX_M2})); + + // Test parsing tool calls with thinking + assert_msg_equals(message_assist_call_thoughts, + common_chat_parse( + "I'm\nthinking1", + /* is_partial= */ false, + { + /* .format = */ COMMON_CHAT_FORMAT_MINIMAX_M2, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK + })); + + // Test tool calls with extra content + assert_msg_equals(message_assist_call_content, + common_chat_parse( + "1Hello, world!\nWhat's up?", + /* is_partial= */ false, + {COMMON_CHAT_FORMAT_MINIMAX_M2} + )); + + // Test tool calls with extra content AND thinking + assert_msg_equals(message_assist_call_thoughts_content, + common_chat_parse( + "I'm\nthinking1Hello, world!\nWhat's up?", + /* is_partial= */ false, + { + /* .format = */ COMMON_CHAT_FORMAT_MINIMAX_M2, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK + })); + + // Test template generation for regular content + test_templates(tmpls.get(), end_tokens, message_assist, tools, + "Hello, world!\nWhat's up?", + /* expect_grammar_triggered= */ false); + + // Test template generation for tool calls + test_templates(tmpls.get(), end_tokens, message_assist_call, tools, + "\n\n1\n\n", + /* expect_grammar_triggered= */ true, + /* test_grammar_if_triggered= */ true, + /* common_reasoning_format= */ COMMON_REASONING_FORMAT_NONE, + /* ignore_whitespace_differences= */ true + ); + + // Test template generation for tools with optional parameters + test_templates(tmpls.get(), end_tokens, message_assist_call_noopt, tools, + "\n\n1\n\n", + /* expect_grammar_triggered= */ true, + /* test_grammar_if_triggered= */ true, + /* common_reasoning_format= */ COMMON_REASONING_FORMAT_NONE, + /* ignore_whitespace_differences= */ true + ); + test_templates(tmpls.get(), end_tokens, message_assist_call_withopt, tools, + "\n\n1\n2\n\n", + /* expect_grammar_triggered= */ true, + /* test_grammar_if_triggered= */ true, + /* common_reasoning_format= */ COMMON_REASONING_FORMAT_NONE, + /* ignore_whitespace_differences= */ true + ); + } + } static void test_msg_diffs_compute() {