diff --git a/common/chat-parser.cpp b/common/chat-parser.cpp
index b3362519a68f3..c0bcd93ef5955 100644
--- a/common/chat-parser.cpp
+++ b/common/chat-parser.cpp
@@ -6,6 +6,8 @@
#include
#include
#include
+#include
+#include
#include
using json = nlohmann::ordered_json;
@@ -420,3 +422,590 @@ std::optional common_chat_msg_parse
void common_chat_msg_parser::clear_tools() {
result_.tool_calls.clear();
}
+
+// Qwen3-Coder XML tool call parser implementation
+namespace {
+ // Constants for DoS protection
+ static constexpr size_t MAX_INPUT_SIZE = 1024 * 1024; // 1MB limit
+ static constexpr size_t MAX_PARAMETER_COUNT = 100; // Maximum parameters per function
+ static constexpr size_t MAX_TAG_NAME_LENGTH = 256; // Maximum tag name length
+ static constexpr size_t MAX_ATTRIBUTE_LENGTH = 1024; // Maximum attribute length
+
+ // Helper function to set error details
+ void set_error(common_chat_msg_parser::XmlParseError & error,
+ common_chat_msg_parser::XmlParseErrorType type,
+ size_t position,
+ const std::string & context,
+ const std::string & message) {
+ error.type = type;
+ error.position = position;
+ error.context = context;
+ error.message = message;
+ }
+
+ // Simple XML tag parser - safer than regex, using string_view for performance
+ struct XmlTag {
+ std::string name;
+ std::string attribute;
+ std::string content;
+ size_t start_pos = 0;
+ size_t end_pos = 0;
+ };
+
+ // Find XML tag with optional attribute - ITERATIVE implementation to avoid stack overflow
+ std::optional find_xml_tag(std::string_view text, std::string_view tag_name, size_t start_pos = 0,
+ common_chat_msg_parser::XmlParseError * error = nullptr) {
+ // Input validation for DoS protection
+ if (text.size() > MAX_INPUT_SIZE) {
+ LOG_DBG("XML input too large: %zu bytes (max: %zu)\n", text.size(), MAX_INPUT_SIZE);
+ if (error) {
+ set_error(*error, common_chat_msg_parser::XmlParseErrorType::INPUT_TOO_LARGE, 0,
+ std::string(text.substr(0, std::min(text.size(), size_t(100)))),
+ "XML input exceeds maximum size limit of " + std::to_string(MAX_INPUT_SIZE) + " bytes");
+ }
+ return std::nullopt;
+ }
+
+ if (tag_name.size() > MAX_TAG_NAME_LENGTH) {
+ LOG_DBG("Tag name too long: %zu chars (max: %zu)\n", tag_name.size(), MAX_TAG_NAME_LENGTH);
+ if (error) {
+ set_error(*error, common_chat_msg_parser::XmlParseErrorType::TAG_NAME_TOO_LONG, 0,
+ std::string(tag_name),
+ "Tag name exceeds maximum length of " + std::to_string(MAX_TAG_NAME_LENGTH) + " characters");
+ }
+ return std::nullopt;
+ }
+
+ if (start_pos >= text.size()) {
+ return std::nullopt;
+ }
+
+ // PERFORMANCE OPTIMIZATION: Use string_view to avoid allocations
+ // Pre-compute tag patterns
+ const std::string open_tag_start = std::string("<") + std::string(tag_name);
+ const std::string close_tag = std::string("") + std::string(tag_name) + ">";
+
+ // ITERATIVE search to avoid recursion and potential stack overflow
+ size_t search_pos = start_pos;
+ while (search_pos < text.size()) {
+ // Look for opening tag
+ size_t open_pos = text.find(open_tag_start, search_pos);
+ if (open_pos == std::string::npos) {
+ return std::nullopt;
+ }
+
+ // Validate that this is actually the start of our tag (not a substring)
+ // Check that the character after tag name is either '>' or '=' or whitespace
+ size_t check_pos = open_pos + open_tag_start.length();
+ if (check_pos < text.size()) {
+ char next_char = text[check_pos];
+ if (next_char != '>' && next_char != '=' && !std::isspace(next_char)) {
+ // This is a false match (e.g., looking for "tool" but found "tool_call")
+ // Continue searching from the next position
+ search_pos = open_pos + 1;
+ continue;
+ }
+ }
+
+ // Find the end of the opening tag
+ size_t open_end = text.find('>', open_pos);
+ if (open_end == std::string::npos) {
+ return std::nullopt;
+ }
+
+ XmlTag tag;
+ tag.start_pos = open_pos;
+
+ // Extract attribute if present (for tags like or )
+ // PERFORMANCE: Use string_view for substring operations
+ size_t tag_content_start = open_pos + 1 + tag_name.length();
+ if (tag_content_start < open_end) {
+ // Look for '=' in the tag content
+ size_t eq_pos = text.find('=', tag_content_start);
+ if (eq_pos != std::string::npos && eq_pos < open_end) {
+ // Skip whitespace after '='
+ size_t attr_start = eq_pos + 1;
+ while (attr_start < open_end && std::isspace(text[attr_start])) {
+ attr_start++;
+ }
+
+ if (attr_start < open_end) {
+ size_t attr_end = open_end;
+
+ // Handle quoted attribute values
+ if (text[attr_start] == '"' || text[attr_start] == '\'') {
+ char quote_char = text[attr_start];
+ attr_start++; // Skip opening quote
+
+ // Find closing quote
+ size_t quote_end = text.find(quote_char, attr_start);
+ if (quote_end != std::string::npos && quote_end < open_end) {
+ attr_end = quote_end;
+ } else {
+ // No closing quote found, treat as unquoted
+ attr_start--; // Go back to include the quote
+ }
+ } else {
+ // Unquoted attribute - trim trailing whitespace
+ while (attr_end > attr_start && std::isspace(text[attr_end - 1])) {
+ attr_end--;
+ }
+ }
+
+ if (attr_start < attr_end) {
+ std::string_view attr_view = text.substr(attr_start, attr_end - attr_start);
+ // Validate attribute length
+ if (attr_view.size() <= MAX_ATTRIBUTE_LENGTH) {
+ tag.attribute = std::string(attr_view);
+ } else {
+ LOG_DBG("Attribute too long: %zu chars (max: %zu)\n", attr_view.size(), MAX_ATTRIBUTE_LENGTH);
+ if (error) {
+ set_error(*error, common_chat_msg_parser::XmlParseErrorType::ATTRIBUTE_TOO_LONG,
+ open_pos, std::string(attr_view.substr(0, 100)),
+ "Attribute exceeds maximum length of " + std::to_string(MAX_ATTRIBUTE_LENGTH) + " characters");
+ }
+ return std::nullopt;
+ }
+ }
+ }
+ }
+ }
+
+ // Look for closing tag - PERFORMANCE: Search from after opening tag
+ size_t close_pos = text.find(close_tag, open_end + 1);
+ if (close_pos == std::string::npos) {
+ return tag;
+ }
+
+ tag.end_pos = close_pos + close_tag.length();
+ tag.name = std::string(tag_name);
+
+ // PERFORMANCE: Use string_view for content extraction
+ size_t content_start = open_end + 1;
+ size_t content_length = close_pos - content_start;
+ if (content_length > 0) {
+ std::string_view content_view = text.substr(content_start, content_length);
+ tag.content = std::string(content_view);
+ }
+
+ return tag;
+ }
+
+ return std::nullopt;
+ }
+
+ // Find all XML tags with a specific name and attribute pattern - with limits, using string_view
+ std::vector find_all_xml_tags(std::string_view text, std::string_view tag_name,
+ common_chat_msg_parser::XmlParseError * error = nullptr) {
+ std::vector tags;
+ size_t pos = 0;
+ size_t tag_count = 0;
+
+ while (pos < text.length() && tag_count < MAX_PARAMETER_COUNT) {
+ auto tag = find_xml_tag(text, tag_name, pos, error);
+ if (!tag) {
+ break;
+ }
+ tags.push_back(*tag);
+ pos = tag->end_pos;
+ ++tag_count;
+ }
+
+ if (tag_count >= MAX_PARAMETER_COUNT) {
+ LOG_DBG("Too many tags found: %zu (max: %zu)\n", tag_count, MAX_PARAMETER_COUNT);
+ if (error) {
+ set_error(*error, common_chat_msg_parser::XmlParseErrorType::TOO_MANY_PARAMETERS, pos,
+ std::string(text.substr(pos, std::min(text.size() - pos, size_t(100)))),
+ "Too many " + std::string(tag_name) + " tags found (max: " + std::to_string(MAX_PARAMETER_COUNT) + ")");
+ }
+ }
+
+ return tags;
+ }
+
+ // Trim whitespace from string using string_view for performance
+ std::string trim_whitespace(std::string_view str) {
+ size_t start = str.find_first_not_of(" \t\n\r");
+ if (start == std::string::npos) {
+ return "";
+ }
+ size_t end = str.find_last_not_of(" \t\n\r");
+ return std::string(str.substr(start, end - start + 1));
+ }
+
+ // Safe integer parsing with overflow protection using string_view
+ bool safe_parse_int(std::string_view str, int & result) {
+ try {
+ // Check for potential overflow by using long long first
+ std::string str_copy(str); // stoll requires std::string
+ long long temp = std::stoll(str_copy);
+ if (temp > std::numeric_limits::max() || temp < std::numeric_limits::min()) {
+ return false; // Overflow
+ }
+ result = static_cast(temp);
+ return true;
+ } catch (const std::exception &) {
+ return false;
+ }
+ }
+
+ // Safe float parsing with overflow protection using string_view
+ bool safe_parse_float(std::string_view str, float & result) {
+ try {
+ std::string str_copy(str); // stod requires std::string
+ double temp = std::stod(str_copy);
+ if (temp > std::numeric_limits::max() || temp < std::numeric_limits::lowest()) {
+ return false; // Overflow
+ }
+ result = static_cast(temp);
+ return true;
+ } catch (const std::exception &) {
+ return false;
+ }
+ }
+
+ // Convert parameter value based on tool schema type - FIXED JSON injection vulnerability, using string_view
+ std::string convert_qwen3_param_value(std::string_view param_value,
+ std::string_view param_name,
+ const nlohmann::json & param_config,
+ std::string_view /* func_name */) {
+ std::string trimmed_value = trim_whitespace(param_value);
+
+ // Handle null value
+ if (trimmed_value == "null") {
+ return "null";
+ }
+
+ // If we have schema information, use it
+ if (param_config.contains(param_name)) {
+ const auto & schema = param_config.at(std::string(param_name));
+ if (schema.contains("type")) {
+ const auto & t = schema.at("type");
+ // Handle union types like ["number","null"]
+ if (t.is_array()) {
+ std::vector types;
+ for (const auto & tv : t) {
+ if (tv.is_string()) {
+ types.push_back((std::string) tv);
+ }
+ }
+ auto list_contains = [&](const char * s) {
+ for (const auto & x : types) {
+ if (x == s) return true;
+ }
+ return false;
+ };
+ auto has = [&](std::string_view ty) {
+ for (const auto & s : types) {
+ if (s == ty) return true;
+ }
+ // Back-compat synonyms
+ if (ty == "string") return list_contains("str") || list_contains("text");
+ if (ty == "integer") return list_contains("int");
+ if (ty == "number") return list_contains("float");
+ if (ty == "boolean") return list_contains("bool");
+ return false;
+ };
+ if (has("null") && trimmed_value == "null") {
+ return "null";
+ }
+ if (has("object") || has("array")) {
+ try {
+ auto parsed = json::parse(trimmed_value);
+ return parsed.dump();
+ } catch (...) {
+ return json(trimmed_value).dump();
+ }
+ }
+ if (has("integer")) {
+ int int_val;
+ if (safe_parse_int(trimmed_value, int_val)) {
+ return std::to_string(int_val);
+ }
+ // if integer parse fails, try number or fall through
+ }
+ if (has("number")) {
+ float float_val;
+ if (safe_parse_float(trimmed_value, float_val)) {
+ return std::to_string(float_val);
+ }
+ }
+ if (has("boolean")) {
+ if (trimmed_value == "true" || trimmed_value == "false") {
+ return trimmed_value;
+ }
+ return "false";
+ }
+ if (has("string")) {
+ return json(trimmed_value).dump();
+ }
+ // Unknown union types: fall through to generic inference below
+ } else if (t.is_string()) {
+ std::string param_type = t;
+ // Convert based on type
+ if (param_type == "string" || param_type == "str" || param_type == "text") {
+ // SECURITY FIX: Use nlohmann::json for proper escaping instead of manual concatenation
+ return json(trimmed_value).dump();
+ } else if (param_type == "integer" || param_type == "int") {
+ int int_val;
+ if (safe_parse_int(trimmed_value, int_val)) {
+ return std::to_string(int_val);
+ } else {
+ // SECURITY FIX: Use proper JSON escaping for fallback string
+ return json(trimmed_value).dump();
+ }
+ } else if (param_type == "number" || param_type == "float") {
+ float float_val;
+ if (safe_parse_float(trimmed_value, float_val)) {
+ return std::to_string(float_val);
+ } else {
+ // SECURITY FIX: Use proper JSON escaping for fallback string
+ return json(trimmed_value).dump();
+ }
+ } else if (param_type == "boolean" || param_type == "bool") {
+ if (trimmed_value == "true" || trimmed_value == "false") {
+ return trimmed_value;
+ }
+ return "false";
+ } else if (param_type == "object" || param_type == "array") {
+ try {
+ auto parsed = json::parse(trimmed_value);
+ return parsed.dump();
+ } catch (...) {
+ // SECURITY FIX: Use proper JSON escaping for fallback string
+ return json(trimmed_value).dump();
+ }
+ }
+ }
+ // If schema.type exists but is not string/array, fall through
+ }
+ }
+
+ // Without schema, try to infer type from value
+ // First check if it's valid JSON (object or array)
+ try {
+ auto parsed_json = json::parse(trimmed_value);
+ return parsed_json.dump(); // It's valid JSON, return as-is
+ } catch (...) {
+ // Not valid JSON, continue with other type checks
+ }
+
+ // Check if it's a number
+ int int_val;
+ if (safe_parse_int(trimmed_value, int_val)) {
+ return std::to_string(int_val); // It's an integer
+ }
+
+ float float_val;
+ if (safe_parse_float(trimmed_value, float_val)) {
+ return std::to_string(float_val); // It's a float
+ }
+
+ // Check if it's a boolean
+ if (trimmed_value == "true" || trimmed_value == "false") {
+ return trimmed_value;
+ }
+
+ // Default to string - SECURITY FIX: Use proper JSON escaping
+ return json(trimmed_value).dump();
+ }
+
+ // Get parameter configuration from tools using string_view
+ nlohmann::json get_param_config(std::string_view func_name,
+ const std::vector & tools) {
+ for (const auto & tool : tools) {
+ if (tool.name == func_name) {
+ try {
+ auto params = json::parse(tool.parameters);
+ if (params.contains("properties")) {
+ return params["properties"];
+ }
+ return params;
+ } catch (...) {
+ return json::object();
+ }
+ }
+ }
+ return json::object();
+ }
+}
+
+bool common_chat_msg_parser::parse_qwen3_xml_tool_call(const std::string & content,
+ const std::vector & tools) {
+ XmlParseError error;
+ bool result = parse_qwen3_xml_tool_call(content, tools, error);
+ last_xml_error_ = error;
+ return result;
+}
+
+bool common_chat_msg_parser::parse_qwen3_xml_tool_call(const std::string & content,
+ const std::vector & tools,
+ XmlParseError & error) {
+ // Clear any previous error
+ error.clear();
+
+ // Input validation for DoS protection
+ if (content.size() > MAX_INPUT_SIZE) {
+ LOG_DBG("XML content too large: %zu bytes (max: %zu)\n", content.size(), MAX_INPUT_SIZE);
+ set_error(error, XmlParseErrorType::INPUT_TOO_LARGE, 0,
+ content.substr(0, std::min(content.size(), size_t(100))),
+ "XML content exceeds maximum size limit of " + std::to_string(MAX_INPUT_SIZE) + " bytes");
+ return false;
+ }
+
+ // Validate tools vector size
+ if (tools.size() > MAX_PARAMETER_COUNT) {
+ LOG_DBG("Too many tools provided: %zu (max: %zu)\n", tools.size(), MAX_PARAMETER_COUNT);
+ set_error(error, XmlParseErrorType::TOO_MANY_TOOLS, 0, "",
+ "Too many tools provided: " + std::to_string(tools.size()) + " (max: " + std::to_string(MAX_PARAMETER_COUNT) + ")");
+ return false;
+ }
+
+ // PERFORMANCE OPTIMIZATION: Create hash set for O(1) function lookup
+ std::unordered_set valid_functions;
+ if (!tools.empty()) {
+ valid_functions.reserve(tools.size());
+ for (const auto & tool : tools) {
+ valid_functions.insert(tool.name);
+ }
+ }
+
+ // PERFORMANCE: Use string_view to avoid unnecessary string copies
+ std::string_view content_view(content);
+
+ // Find tool_call tag
+ auto tool_call_tag = find_xml_tag(content_view, "tool_call", 0, &error);
+ if (!tool_call_tag) {
+ if (!error.has_error()) {
+ set_error(error, XmlParseErrorType::INVALID_XML_STRUCTURE, 0, content.substr(0, std::min(content.size(), size_t(100))),
+ "No valid tag found in content");
+ }
+ return false;
+ }
+
+ // Extract content before the tool call - with bounds checking
+ if (tool_call_tag->start_pos > 0 && tool_call_tag->start_pos <= content.size()) {
+ std::string content_before = content.substr(0, tool_call_tag->start_pos);
+ // Don't trim whitespace here as it might be significant for the content
+ if (!content_before.empty()) {
+ add_content(content_before);
+ }
+ }
+
+ if (!tool_call_tag->end_pos) {
+ return true;
+ }
+
+ // Find function tag within tool_call - use string_view for performance
+ std::string_view tool_call_content_view(tool_call_tag->content);
+ auto function_tag = find_xml_tag(tool_call_content_view, "function", 0, &error);
+ if (!function_tag || function_tag->attribute.empty()) {
+ LOG_DBG("Invalid or missing function tag in tool_call\n");
+ if (!error.has_error()) {
+ set_error(error, XmlParseErrorType::INVALID_XML_STRUCTURE, tool_call_tag->start_pos,
+ tool_call_tag->content.substr(0, std::min(tool_call_tag->content.size(), size_t(100))),
+ "Invalid or missing tag with attribute in ");
+ }
+ return false;
+ }
+
+ std::string function_name = trim_whitespace(function_tag->attribute);
+
+ // Validate function name
+ if (function_name.empty() || function_name.size() > MAX_TAG_NAME_LENGTH) {
+ LOG_DBG("Invalid function name: '%s' (length: %zu, max: %zu)\n",
+ function_name.c_str(), function_name.size(), MAX_TAG_NAME_LENGTH);
+ set_error(error, XmlParseErrorType::INVALID_FUNCTION_NAME,
+ tool_call_tag->start_pos + function_tag->start_pos,
+ function_name,
+ "Invalid function name: '" + function_name + "' (length: " + std::to_string(function_name.size()) + ", max: " + std::to_string(MAX_TAG_NAME_LENGTH) + ")");
+ return false;
+ }
+
+ // PERFORMANCE OPTIMIZATION: Use hash set for O(1) function lookup instead of O(n) loop
+ if (!tools.empty() && valid_functions.find(function_name) == valid_functions.end()) {
+ LOG_DBG("Function '%s' not found in available tools\n", function_name.c_str());
+ set_error(error, XmlParseErrorType::FUNCTION_NOT_FOUND,
+ tool_call_tag->start_pos + function_tag->start_pos,
+ function_name,
+ "Function '" + function_name + "' not found in available tools");
+ return false;
+ }
+
+ // Get parameter configuration for this function - use string_view
+ auto param_config = get_param_config(std::string_view(function_name), tools);
+
+ // Parse parameters within function tag - use string_view for performance
+ json arguments = json::object();
+ std::string_view function_content_view(function_tag->content);
+ auto parameter_tags = find_all_xml_tags(function_content_view, "parameter", &error);
+
+ // Check if error occurred during parameter parsing
+ if (error.has_error()) {
+ return false;
+ }
+
+ // Limit parameter count for DoS protection
+ size_t param_count = 0;
+ for (const auto & param_tag : parameter_tags) {
+ if (param_count >= MAX_PARAMETER_COUNT) {
+ LOG_DBG("Too many parameters for function '%s': %zu (max: %zu)\n",
+ function_name.c_str(), param_count, MAX_PARAMETER_COUNT);
+ set_error(error, XmlParseErrorType::TOO_MANY_PARAMETERS,
+ tool_call_tag->start_pos + function_tag->start_pos,
+ function_name,
+ "Too many parameters for function '" + function_name + "': " + std::to_string(param_count) + " (max: " + std::to_string(MAX_PARAMETER_COUNT) + ")");
+ break;
+ }
+
+ if (param_tag.attribute.empty()) {
+ LOG_DBG("Skipping parameter with empty attribute\n");
+ continue; // Skip malformed parameter tags
+ }
+
+ std::string param_name = trim_whitespace(param_tag.attribute);
+ std::string param_value = param_tag.content;
+
+ // Validate parameter name
+ if (param_name.empty() || param_name.size() > MAX_TAG_NAME_LENGTH) {
+ LOG_DBG("Invalid parameter name: '%s' (length: %zu, max: %zu)\n",
+ param_name.c_str(), param_name.size(), MAX_TAG_NAME_LENGTH);
+ continue;
+ }
+
+ // Convert value based on schema type - use string_view for performance
+ try {
+ std::string converted_value = convert_qwen3_param_value(
+ std::string_view(param_value),
+ std::string_view(param_name),
+ param_config,
+ std::string_view(function_name)
+ );
+ arguments[param_name] = json::parse(converted_value);
+ ++param_count;
+ } catch (const std::exception & e) {
+ LOG_DBG("Failed to convert parameter '%s': %s, using raw value\n", param_name.c_str(), e.what());
+ set_error(error, XmlParseErrorType::PARAMETER_CONVERSION_FAILED,
+ tool_call_tag->start_pos + function_tag->start_pos + param_tag.start_pos,
+ param_name + "=" + param_value,
+ "Failed to convert parameter '" + param_name + "': " + e.what());
+ // Fallback to trimmed raw value with proper JSON escaping
+ arguments[param_name] = trim_whitespace(param_value);
+ ++param_count;
+ }
+ }
+
+ // Add the tool call with error handling
+ try {
+ std::string args_json = arguments.dump();
+ return add_tool_call(function_name, "", args_json);
+ } catch (const std::exception & e) {
+ LOG_DBG("Failed to serialize arguments for function '%s': %s\n", function_name.c_str(), e.what());
+ set_error(error, XmlParseErrorType::JSON_SERIALIZATION_FAILED,
+ tool_call_tag->start_pos,
+ function_name,
+ "Failed to serialize arguments for function '" + function_name + "': " + e.what());
+ return false;
+ }
+}
+
diff --git a/common/chat-parser.h b/common/chat-parser.h
index c8cdc63fb50f6..81decdf9bf3fc 100644
--- a/common/chat-parser.h
+++ b/common/chat-parser.h
@@ -8,6 +8,7 @@
#include
#include
+#include
#include
class common_chat_msg_partial_exception : public std::runtime_error {
@@ -120,4 +121,45 @@ class common_chat_msg_parser {
);
void clear_tools();
+
+ // Error reporting for XML parser
+ enum class XmlParseErrorType {
+ NONE,
+ INPUT_TOO_LARGE,
+ TAG_NAME_TOO_LONG,
+ ATTRIBUTE_TOO_LONG,
+ TOO_MANY_PARAMETERS,
+ TOO_MANY_TOOLS,
+ INVALID_XML_STRUCTURE,
+ FUNCTION_NOT_FOUND,
+ INVALID_FUNCTION_NAME,
+ PARAMETER_CONVERSION_FAILED,
+ JSON_SERIALIZATION_FAILED
+ };
+
+ struct XmlParseError {
+ XmlParseErrorType type = XmlParseErrorType::NONE;
+ size_t position = 0;
+ std::string context;
+ std::string message;
+
+ bool has_error() const { return type != XmlParseErrorType::NONE; }
+ void clear() {
+ type = XmlParseErrorType::NONE;
+ position = 0;
+ context.clear();
+ message.clear();
+ }
+ };
+
+ // Qwen3-Coder XML tool call parser with error reporting
+ bool parse_qwen3_xml_tool_call(const std::string & content, const std::vector & tools);
+ bool parse_qwen3_xml_tool_call(const std::string & content, const std::vector & tools, XmlParseError & error);
+
+ // Get last parse error
+ const XmlParseError & get_last_xml_parse_error() const { return last_xml_error_; }
+
+private:
+ XmlParseError last_xml_error_;
};
+
diff --git a/common/chat.cpp b/common/chat.cpp
index 87212322ec248..9bb29a33da30d 100644
--- a/common/chat.cpp
+++ b/common/chat.cpp
@@ -10,6 +10,7 @@
#include
#include
+#include
#include
#include
#include
@@ -639,6 +640,7 @@ const char * common_chat_format_name(common_chat_format format) {
case COMMON_CHAT_FORMAT_SEED_OSS: return "Seed-OSS";
case COMMON_CHAT_FORMAT_NEMOTRON_V2: return "Nemotron V2";
case COMMON_CHAT_FORMAT_APERTUS: return "Apertus";
+ case COMMON_CHAT_FORMAT_QWEN3_CODER_XML: return "Qwen3 Coder XML";
default:
throw std::runtime_error("Unknown chat format");
}
@@ -2397,6 +2399,7 @@ static void common_chat_parse_nemotron_v2(common_chat_msg_parser & builder) {
static void common_chat_parse_apertus(common_chat_msg_parser & builder) {
// Parse thinking tags
builder.try_parse_reasoning("<|inner_prefix|>", "<|inner_suffix|>");
+
if (!builder.syntax().parse_tool_calls) {
builder.add_content(builder.consume_rest());
return;
@@ -2425,6 +2428,188 @@ static void common_chat_parse_apertus(common_chat_msg_parser & builder) {
builder.add_content(builder.consume_rest());
}
+static common_chat_params common_chat_params_init_qwen3_coder_xml(const common_chat_template & tmpl, const struct templates_params & inputs) {
+ common_chat_params data;
+ data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
+
+ // Always set the format to QWEN3_CODER_XML regardless of whether tools are provided
+ // The format identifies the template type, not the runtime configuration
+ data.format = COMMON_CHAT_FORMAT_QWEN3_CODER_XML;
+
+ if (!inputs.tools.empty()) {
+ data.grammar = build_grammar([&](const common_grammar_builder & builder) {
+ std::vector tool_rules;
+
+ auto not_parameter_end = builder.add_rule("not_parameter_end", "([^<] | (\"<\" [^/]) | (\"\" [^p]) | (\"
]))*");
+
+ foreach_function(inputs.tools, [&](const json & tool) {
+ const auto & function = tool.at("function");
+ const std::string & name = function.at("name");
+ auto parameters = function.at("parameters");
+ builder.resolve_refs(parameters);
+
+ std::unordered_set required;
+ if (parameters.contains("required")) {
+ for (const auto & p : parameters.at("required")) {
+ required.insert(p);
+ }
+ }
+
+ // Build parameter rules for XML format
+ std::vector param_rules;
+ if (parameters.contains("properties")) {
+ for (const auto & [param_name, param_schema] : parameters["properties"].items()) {
+ std::string param_rule = "\"\" space ";
+
+ // Add parameter value based on type (supports unions and anyOf/oneOf; sanitize unsupported {"not":{}} branches)
+ auto schema_local = param_schema;
+
+ // Recursively remove entries like {"not":{}} inside anyOf/oneOf that json-schema-to-grammar doesn't support
+ std::function sanitize = [&](json &s) {
+ if (s.is_object()) {
+ if (s.contains("anyOf") && s["anyOf"].is_array()) {
+ json filtered = json::array();
+ for (auto v : s["anyOf"]) {
+ if (v.is_object() && v.contains("not") && v["not"].is_object() && v["not"].empty()) {
+ continue;
+ }
+ sanitize(v);
+ filtered.push_back(v);
+ }
+ s["anyOf"] = filtered;
+ if (s["anyOf"].size() == 1) {
+ json single = s["anyOf"][0];
+ s.erase("anyOf");
+ for (auto it = single.begin(); it != single.end(); ++it) {
+ s[it.key()] = it.value();
+ }
+ }
+ }
+ if (s.contains("oneOf") && s["oneOf"].is_array()) {
+ json filtered = json::array();
+ for (auto v : s["oneOf"]) {
+ if (v.is_object() && v.contains("not") && v["not"].is_object() && v["not"].empty()) {
+ continue;
+ }
+ sanitize(v);
+ filtered.push_back(v);
+ }
+ s["oneOf"] = filtered;
+ if (s["oneOf"].size() == 1) {
+ json single = s["oneOf"][0];
+ s.erase("oneOf");
+ for (auto it = single.begin(); it != single.end(); ++it) {
+ s[it.key()] = it.value();
+ }
+ }
+ }
+ for (auto it = s.begin(); it != s.end(); ++it) {
+ sanitize(it.value());
+ }
+ } else if (s.is_array()) {
+ for (auto & v : s) sanitize(v);
+ }
+ };
+ sanitize(schema_local);
+
+ // Determine if schema allows a plain string (so we can accept unquoted text content in XML)
+ std::function allows_string = [&](const json & sch) -> bool {
+ if (!sch.is_object()) return false;
+ if (sch.contains("type")) {
+ const auto & t = sch.at("type");
+ if (t.is_string()) {
+ std::string ts = t;
+ return ts == "string" || ts == "text" || ts == "str";
+ }
+ if (t.is_array()) {
+ for (const auto & tv : t) {
+ if (tv.is_string() && (tv == "string" || tv == "text" || tv == "str")) {
+ return true;
+ }
+ }
+ }
+ }
+ if (sch.contains("anyOf") && sch["anyOf"].is_array()) {
+ for (const auto & v : sch["anyOf"]) {
+ if (allows_string(v)) return true;
+ }
+ }
+ if (sch.contains("oneOf") && sch["oneOf"].is_array()) {
+ for (const auto & v : sch["oneOf"]) {
+ if (allows_string(v)) return true;
+ }
+ }
+ return false;
+ };
+
+ if (allows_string(schema_local)) {
+ // For string-accepting schemas, keep freeform XML text (no JSON quoting)
+ param_rule += not_parameter_end;
+ } else {
+ // For non-strings (object/array/number/boolean/null), expect JSON per schema
+ param_rule += builder.add_schema(name + "-parameter-" + param_name, schema_local);
+ }
+
+ param_rule += "\"\" space";
+
+ // Parameter is optional
+ if (required.find(param_name) == required.end()) {
+ param_rule = "(" + param_rule + ")? ";
+ }
+
+ param_rules.push_back(param_rule);
+ }
+ }
+
+ std::string function_content = param_rules.empty() ? "space" : string_join(param_rules, " ");
+ tool_rules.push_back(builder.add_rule(name + "-call",
+ "\"\" space \"\" space " +
+ function_content + " \"\" space \"\" space"));
+ });
+
+ auto tool_call = builder.add_rule("tool_call", string_join(tool_rules, " | "));
+ builder.add_rule("root", inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call);
+
+ data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, ""});
+ data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "",
+ "",
+ "",
+ "",
+ };
+ } else {
+ // When no tools are provided, disable lazy grammar to avoid "no triggers set" error
+ data.grammar_lazy = false;
+ }
+
+ data.prompt = apply(tmpl, inputs);
+ return data;
+}
+
+static void common_chat_parse_qwen3_coder_xml(common_chat_msg_parser & builder) {
+ if (!builder.syntax().parse_tool_calls) {
+ builder.add_content(builder.consume_rest());
+ return;
+ }
+
+ std::string content = builder.consume_rest();
+
+ // Try to parse Qwen3-Coder XML format
+ // For now, use empty tools vector - we'll need to pass tools differently
+ std::vector empty_tools;
+ if (builder.parse_qwen3_xml_tool_call(content, empty_tools)) {
+ // Successfully parsed XML tool call
+ return;
+ }
+ // If no tool call found, treat as regular content
+ builder.add_content(content);
+}
+
static void common_chat_parse_seed_oss(common_chat_msg_parser & builder) {
// Parse thinking tags first - this handles the main reasoning content
builder.try_parse_reasoning("", "");
@@ -2644,6 +2829,15 @@ static common_chat_params common_chat_templates_apply_jinja(
return common_chat_params_init_command_r7b(tmpl, params);
}
+ // Qwen3-Coder XML format detection (must come before Hermes 2 Pro)
+ // Detect via explicit XML markers unique to Qwen3-Coder to avoid false positives in other templates.
+ // Require presence of , , and blocks.
+ if (src.find("") != std::string::npos &&
+ src.find("") != std::string::npos) {
return common_chat_params_init_granite(tmpl, params);
@@ -2712,6 +2906,7 @@ static common_chat_params common_chat_templates_apply_jinja(
return common_chat_params_init_mistral_nemo(tmpl, params);
}
+
// Generic fallback
return common_chat_params_init_generic(tmpl, params);
}
@@ -2844,6 +3039,9 @@ static void common_chat_parse(common_chat_msg_parser & builder) {
case COMMON_CHAT_FORMAT_APERTUS:
common_chat_parse_apertus(builder);
break;
+ case COMMON_CHAT_FORMAT_QWEN3_CODER_XML:
+ common_chat_parse_qwen3_coder_xml(builder);
+ break;
default:
throw std::runtime_error(std::string("Unsupported format: ") + common_chat_format_name(builder.syntax().format));
}
diff --git a/common/chat.h b/common/chat.h
index 3c277e15eba7f..69924c00dcd18 100644
--- a/common/chat.h
+++ b/common/chat.h
@@ -110,6 +110,7 @@ enum common_chat_format {
COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
COMMON_CHAT_FORMAT_HERMES_2_PRO,
COMMON_CHAT_FORMAT_COMMAND_R7B,
+ COMMON_CHAT_FORMAT_QWEN3_CODER_XML,
COMMON_CHAT_FORMAT_GRANITE,
COMMON_CHAT_FORMAT_GPT_OSS,
COMMON_CHAT_FORMAT_SEED_OSS,
diff --git a/models/templates/Qwen3-Coder.jinja b/models/templates/Qwen3-Coder.jinja
new file mode 100644
index 0000000000000..49b0e8d0ee7e6
--- /dev/null
+++ b/models/templates/Qwen3-Coder.jinja
@@ -0,0 +1,117 @@
+{% macro render_extra_keys(json_dict, handled_keys) %}
+ {%- if json_dict is mapping %}
+ {%- for json_key in json_dict if json_key not in handled_keys %}
+ {%- if json_dict[json_key] is mapping or (json_dict[json_key] is sequence and json_dict[json_key] is not string) %}
+ {{- '\n<' ~ json_key ~ '>' ~ (json_dict[json_key] | tojson | safe) ~ '' ~ json_key ~ '>' }}
+ {%- else %}
+ {{-'\n<' ~ json_key ~ '>' ~ (json_dict[json_key] | string) ~ '' ~ json_key ~ '>' }}
+ {%- endif %}
+ {%- endfor %}
+ {%- endif %}
+{% endmacro %}
+
+{%- if messages[0]["role"] == "system" %}
+ {%- set system_message = messages[0]["content"] %}
+ {%- set loop_messages = messages[1:] %}
+{%- else %}
+ {%- set loop_messages = messages %}
+{%- endif %}
+
+{%- if not tools is defined %}
+ {%- set tools = [] %}
+{%- endif %}
+
+{%- if system_message is defined %}
+ {{- "<|im_start|>system\n" + system_message }}
+{%- else %}
+ {%- if tools is iterable and tools | length > 0 %}
+ {{- "<|im_start|>system\nYou are Qwen, a helpful AI assistant that can interact with a computer to solve tasks." }}
+ {%- endif %}
+{%- endif %}
+{%- if tools is iterable and tools | length > 0 %}
+ {{- "\n\n# Tools\n\nYou have access to the following functions:\n\n" }}
+ {{- "" }}
+ {%- for tool in tools %}
+ {%- if tool.function is defined %}
+ {%- set tool = tool.function %}
+ {%- endif %}
+ {{- "\n\n" ~ tool.name ~ "" }}
+ {%- if tool.description is defined %}
+ {{- '\n' ~ (tool.description | trim) ~ '' }}
+ {%- endif %}
+ {{- '\n' }}
+ {%- if tool.parameters is defined and tool.parameters is mapping and tool.parameters.properties is defined and tool.parameters.properties is mapping %}
+ {%- for param_name, param_fields in tool.parameters.properties|items %}
+ {{- '\n' }}
+ {{- '\n' ~ param_name ~ '' }}
+ {%- if param_fields.type is defined %}
+ {{- '\n' ~ (param_fields.type | string) ~ '' }}
+ {%- endif %}
+ {%- if param_fields.description is defined %}
+ {{- '\n' ~ (param_fields.description | trim) ~ '' }}
+ {%- endif %}
+ {%- set handled_keys = ['name', 'type', 'description'] %}
+ {{- render_extra_keys(param_fields, handled_keys) }}
+ {{- '\n' }}
+ {%- endfor %}
+ {%- endif %}
+ {% set handled_keys = ['type', 'properties'] %}
+ {{- render_extra_keys(tool.parameters, handled_keys) }}
+ {{- '\n' }}
+ {%- set handled_keys = ['type', 'name', 'description', 'parameters'] %}
+ {{- render_extra_keys(tool, handled_keys) }}
+ {{- '\n' }}
+ {%- endfor %}
+ {{- "\n" }}
+ {{- '\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n\n\n\nvalue_1\n\n\nThis is the value for the second parameter\nthat can span\nmultiple lines\n\n\n\n\n\nReminder:\n- Function calls MUST follow the specified format: an inner block must be nested within XML tags\n- Required parameters MUST be specified\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\n' }}
+{%- endif %}
+{%- if system_message is defined %}
+ {{- '<|im_end|>\n' }}
+{%- else %}
+ {%- if tools is iterable and tools | length > 0 %}
+ {{- '<|im_end|>\n' }}
+ {%- endif %}
+{%- endif %}
+{%- for message in loop_messages %}
+ {%- if message.role == "assistant" and message.tool_calls is defined and message.tool_calls is iterable and message.tool_calls | length > 0 %}
+ {{- '<|im_start|>' + message.role }}
+ {%- if message.content is defined and message.content is string and message.content | trim | length > 0 %}
+ {{- '\n' + message.content | trim + '\n' }}
+ {%- endif %}
+ {%- for tool_call in message.tool_calls %}
+ {%- if tool_call.function is defined %}
+ {%- set tool_call = tool_call.function %}
+ {%- endif %}
+ {{- '\n\n\n' }}
+ {%- if tool_call.arguments is defined %}
+ {%- for args_name, args_value in tool_call.arguments|items %}
+ {{- '\n' }}
+ {%- set args_value = args_value | tojson | safe if args_value is mapping or (args_value is sequence and args_value is not string) else args_value | string %}
+ {{- args_value }}
+ {{- '\n\n' }}
+ {%- endfor %}
+ {%- endif %}
+ {{- '\n' }}
+ {%- endfor %}
+ {{- '<|im_end|>\n' }}
+ {%- elif message.role == "user" or message.role == "system" or message.role == "assistant" %}
+ {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
+ {%- elif message.role == "tool" %}
+ {%- if loop.previtem and loop.previtem.role != "tool" %}
+ {{- '<|im_start|>user\n' }}
+ {%- endif %}
+ {{- '\n' }}
+ {{- message.content }}
+ {{- '\n\n' }}
+ {%- if not loop.last and loop.nextitem.role != "tool" %}
+ {{- '<|im_end|>\n' }}
+ {%- elif loop.last %}
+ {{- '<|im_end|>\n' }}
+ {%- endif %}
+ {%- else %}
+ {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>\n' }}
+ {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+ {{- '<|im_start|>assistant\n' }}
+{%- endif %}
diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp
index 9cd67e3ef49d3..2182700b2c5c4 100644
--- a/tests/test-chat.cpp
+++ b/tests/test-chat.cpp
@@ -1388,6 +1388,675 @@ static void test_template_output_parsers() {
"{\"arg1\": 1}\n"
"```<|tool▁call▁end|><|tool▁calls▁end|>");
}
+
+ // Test Qwen3-Coder XML format - Comprehensive test suite
+ {
+ printf("Testing Qwen3-Coder XML format - Comprehensive Suite\n");
+
+ // Test 1: Basic XML tool call parsing
+ assert_msg_equals(
+ message_assist_call,
+ common_chat_parse(
+ "\n"
+ " \n"
+ " \n"
+ " 1\n"
+ " \n"
+ " \n"
+ "",
+ /* is_partial= */ false,
+ {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}));
+
+ // Test 2: Multiple parameters with different types
+ common_chat_msg expected_multi_param;
+ expected_multi_param.role = "assistant";
+ expected_multi_param.tool_calls = {
+ { "complex_function", "{\"name\":\"John Doe\",\"age\":30,\"active\":true,\"score\":95.5}", "" }
+ };
+
+ assert_msg_equals(
+ expected_multi_param,
+ common_chat_parse(
+ "\n"
+ " \n"
+ " \n"
+ " John Doe\n"
+ " \n"
+ " \n"
+ " 30\n"
+ " \n"
+ " \n"
+ " true\n"
+ " \n"
+ " \n"
+ " 95.5\n"
+ " \n"
+ " \n"
+ "",
+ /* is_partial= */ false,
+ {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}));
+
+ // Test 3: Special characters and Unicode
+ common_chat_msg expected_special_chars;
+ expected_special_chars.role = "assistant";
+ expected_special_chars.tool_calls = {
+ { "unicode_function", "{\"message\":\"Hello 世界! 🌍 Special chars: @#$%^&*()\"}", "" }
+ };
+
+ assert_msg_equals(
+ expected_special_chars,
+ common_chat_parse(
+ "\n"
+ " \n"
+ " \n"
+ " Hello 世界! 🌍 Special chars: @#$%^&*()\n"
+ " \n"
+ " \n"
+ "",
+ /* is_partial= */ false,
+ {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}));
+
+ // Test 4: Multiline content with newlines and indentation
+ common_chat_msg expected_multiline;
+ expected_multiline.role = "assistant";
+ expected_multiline.tool_calls = {
+ { "code_function", "{\"code\":\"def hello():\\n print(\\\"Hello, World!\\\")\\n return True\"}", "" }
+ };
+
+ assert_msg_equals(
+ expected_multiline,
+ common_chat_parse(
+ "\n"
+ " \n"
+ " \n"
+ "def hello():\n"
+ " print(\"Hello, World!\")\n"
+ " return True\n"
+ " \n"
+ " \n"
+ "",
+ /* is_partial= */ false,
+ {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}));
+
+ // Test 5: JSON object as parameter value
+ common_chat_msg expected_json_param;
+ expected_json_param.role = "assistant";
+ expected_json_param.tool_calls = {
+ { "json_function", "{\"config\":{\"host\":\"localhost\",\"port\":8080,\"ssl\":false}}", "" }
+ };
+
+ assert_msg_equals(
+ expected_json_param,
+ common_chat_parse(
+ "\n"
+ " \n"
+ " \n"
+ " {\"host\": \"localhost\", \"port\": 8080, \"ssl\": false}\n"
+ " \n"
+ " \n"
+ "",
+ /* is_partial= */ false,
+ {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}));
+
+ // Test 6: Array as parameter value
+ common_chat_msg expected_array_param;
+ expected_array_param.role = "assistant";
+ expected_array_param.tool_calls = {
+ { "array_function", "{\"items\":[\"apple\",\"banana\",\"cherry\"]}", "" }
+ };
+
+ assert_msg_equals(
+ expected_array_param,
+ common_chat_parse(
+ "\n"
+ " \n"
+ " \n"
+ " [\"apple\", \"banana\", \"cherry\"]\n"
+ " \n"
+ " \n"
+ "",
+ /* is_partial= */ false,
+ {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}));
+
+ // Test 7: Empty parameter
+ common_chat_msg expected_empty_param;
+ expected_empty_param.role = "assistant";
+ expected_empty_param.tool_calls = {
+ { "empty_function", "{\"empty_param\":\"\"}", "" }
+ };
+
+ assert_msg_equals(
+ expected_empty_param,
+ common_chat_parse(
+ "\n"
+ " \n"
+ " \n"
+ " \n"
+ " \n"
+ "",
+ /* is_partial= */ false,
+ {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}));
+
+ // Test 8: Boolean values (true/false)
+ common_chat_msg expected_boolean;
+ expected_boolean.role = "assistant";
+ expected_boolean.tool_calls = {
+ { "boolean_function", "{\"enabled\":true,\"debug\":false}", "" }
+ };
+
+ assert_msg_equals(
+ expected_boolean,
+ common_chat_parse(
+ "\n"
+ " \n"
+ " \n"
+ " true\n"
+ " \n"
+ " \n"
+ " false\n"
+ " \n"
+ " \n"
+ "",
+ /* is_partial= */ false,
+ {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}));
+
+ // Test 9: Null value
+ common_chat_msg expected_null;
+ expected_null.role = "assistant";
+ expected_null.tool_calls = {
+ { "null_function", "{\"optional_param\":null}", "" }
+ };
+
+ assert_msg_equals(
+ expected_null,
+ common_chat_parse(
+ "\n"
+ " \n"
+ " \n"
+ " null\n"
+ " \n"
+ " \n"
+ "",
+ /* is_partial= */ false,
+ {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}));
+
+ // Test 10: Negative numbers and scientific notation
+ common_chat_msg expected_numbers;
+ expected_numbers.role = "assistant";
+ expected_numbers.tool_calls = {
+ { "math_function", "{\"negative\":-42,\"decimal\":-3.14,\"scientific\":1.23e-4}", "" }
+ };
+
+ assert_msg_equals(
+ expected_numbers,
+ common_chat_parse(
+ "\n"
+ " \n"
+ " \n"
+ " -42\n"
+ " \n"
+ " \n"
+ " -3.14\n"
+ " \n"
+ " \n"
+ " 1.23e-4\n"
+ " \n"
+ " \n"
+ "",
+ /* is_partial= */ false,
+ {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}));
+
+ // Test 11: XML-like content in parameters (should be escaped)
+ common_chat_msg expected_xml_content;
+ expected_xml_content.role = "assistant";
+ expected_xml_content.tool_calls = {
+ { "xml_function", "{\"xml_content\":\"- value
\"}", "" }
+ };
+
+ assert_msg_equals(
+ expected_xml_content,
+ common_chat_parse(
+ "\n"
+ " \n"
+ " \n"
+ " - value
\n"
+ " \n"
+ " \n"
+ "",
+ /* is_partial= */ false,
+ {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}));
+
+ // Test 12: Quotes and escape characters
+ common_chat_msg expected_quotes;
+ expected_quotes.role = "assistant";
+ expected_quotes.tool_calls = {
+ { "quote_function", "{\"message\":\"She said \\\"Hello!\\\" and left.\"}", "" }
+ };
+
+ assert_msg_equals(
+ expected_quotes,
+ common_chat_parse(
+ "\n"
+ " \n"
+ " \n"
+ " She said \"Hello!\" and left.\n"
+ " \n"
+ " \n"
+ "",
+ /* is_partial= */ false,
+ {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}));
+
+ // Test 13: Long parameter value (simplified)
+ std::string long_text = "This is a long text parameter that should test the parser's ability to handle larger amounts of text data.";
+
+ common_chat_msg expected_long_text;
+ expected_long_text.role = "assistant";
+ expected_long_text.tool_calls = {
+ { "long_function", "{\"long_text\":\"" + long_text + "\"}", "" }
+ };
+
+ assert_msg_equals(
+ expected_long_text,
+ common_chat_parse(
+ "\n"
+ " \n"
+ " \n"
+ " " + long_text + "\n"
+ " \n"
+ " \n"
+ "",
+ /* is_partial= */ false,
+ {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}));
+
+ // Test 14: Mixed content with text before and after tool call
+ common_chat_msg expected_mixed_content;
+ expected_mixed_content.role = "assistant";
+ expected_mixed_content.content = "I'll help you search for products. ";
+ expected_mixed_content.tool_calls = {
+ { "search_function", "{\"query\":\"laptops\"}", "" }
+ };
+
+ assert_msg_equals(
+ expected_mixed_content,
+ common_chat_parse(
+ "I'll help you search for products. \n"
+ " \n"
+ " \n"
+ " laptops\n"
+ " \n"
+ " \n"
+ "",
+ /* is_partial= */ false,
+ {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}));
+
+ // Test 15: Compact format (no extra whitespace)
+ common_chat_msg expected_compact;
+ expected_compact.role = "assistant";
+ expected_compact.tool_calls = {
+ { "compact_function", "{\"param\":\"value\"}", "" }
+ };
+
+ assert_msg_equals(
+ expected_compact,
+ common_chat_parse(
+ "value",
+ /* is_partial= */ false,
+ {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}));
+
+ // Test 16: Function name with underscores and numbers
+ common_chat_msg expected_complex_name;
+ expected_complex_name.role = "assistant";
+ expected_complex_name.tool_calls = {
+ { "get_user_data_v2", "{\"user_id\":12345}", "" }
+ };
+
+ assert_msg_equals(
+ expected_complex_name,
+ common_chat_parse(
+ "\n"
+ " \n"
+ " \n"
+ " 12345\n"
+ " \n"
+ " \n"
+ "",
+ /* is_partial= */ false,
+ {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}));
+
+ // Test 17: Parameter names with underscores and numbers
+ common_chat_msg expected_complex_params;
+ expected_complex_params.role = "assistant";
+ expected_complex_params.tool_calls = {
+ { "test_function", "{\"param_1\":\"value1\",\"param_2_name\":\"value2\",\"param3\":123}", "" }
+ };
+
+ assert_msg_equals(
+ expected_complex_params,
+ common_chat_parse(
+ "\n"
+ " \n"
+ " \n"
+ " value1\n"
+ " \n"
+ " \n"
+ " value2\n"
+ " \n"
+ " \n"
+ " 123\n"
+ " \n"
+ " \n"
+ "",
+ /* is_partial= */ false,
+ {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}));
+
+ printf("✅ All Qwen3-Coder XML format tests passed!\n");
+ }
+
+ // Test Qwen3-Coder XML format - Error handling and edge cases
+ {
+ printf("Testing Qwen3-Coder XML format - Error handling and edge cases\n");
+
+ // Test 1: No tool_call tags (should be treated as regular content)
+ common_chat_msg expected_no_tool_call;
+ expected_no_tool_call.role = "assistant";
+ expected_no_tool_call.content = "This is just regular text without any tool calls.";
+
+ assert_msg_equals(
+ expected_no_tool_call,
+ common_chat_parse(
+ "This is just regular text without any tool calls.",
+ /* is_partial= */ false,
+ {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}));
+
+ // Test 2: Empty function name (should fall back to content)
+ common_chat_msg expected_empty_function;
+ expected_empty_function.role = "assistant";
+ expected_empty_function.content = "";
+
+ assert_msg_equals(
+ expected_empty_function,
+ common_chat_parse(
+ "",
+ /* is_partial= */ false,
+ {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}));
+
+ // Test 3: Malformed parameter tags (should still parse function but ignore malformed params)
+ common_chat_msg expected_malformed_params;
+ expected_malformed_params.role = "assistant";
+ expected_malformed_params.tool_calls = {
+ { "test", "{}", "" } // Empty arguments since parameter is malformed
+ };
+
+ assert_msg_equals(
+ expected_malformed_params,
+ common_chat_parse(
+ "no name",
+ /* is_partial= */ false,
+ {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}));
+
+ // Test 4: Nested tool calls (should parse the first one)
+ common_chat_msg expected_nested;
+ expected_nested.role = "assistant";
+ expected_nested.tool_calls = {
+ { "outer_function", "{\"param\":\"value\"}", "" }
+ };
+
+ assert_msg_equals(
+ expected_nested,
+ common_chat_parse(
+ "\n"
+ " \n"
+ " \n"
+ " value\n"
+ " \n"
+ " \n"
+ "\n"
+ "\n"
+ " \n"
+ " \n"
+ " value2\n"
+ " \n"
+ " \n"
+ "",
+ /* is_partial= */ false,
+ {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}));
+
+ // Test 5: Very deeply nested XML content in parameter
+ common_chat_msg expected_deep_xml;
+ expected_deep_xml.role = "assistant";
+ expected_deep_xml.tool_calls = {
+ { "xml_parser", "{\"xml\":\"deep content\"}", "" }
+ };
+
+ assert_msg_equals(
+ expected_deep_xml,
+ common_chat_parse(
+ "\n"
+ " \n"
+ " \n"
+ " deep content\n"
+ " \n"
+ " \n"
+ "",
+ /* is_partial= */ false,
+ {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}));
+
+ // Test 6: Parameter with only whitespace
+ common_chat_msg expected_whitespace_param;
+ expected_whitespace_param.role = "assistant";
+ expected_whitespace_param.tool_calls = {
+ { "whitespace_function", "{\"spaces\":\"\"}", "" }
+ };
+
+ assert_msg_equals(
+ expected_whitespace_param,
+ common_chat_parse(
+ "\n"
+ " \n"
+ " \n"
+ " \n"
+ " \n"
+ " \n"
+ "",
+ /* is_partial= */ false,
+ {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}));
+
+ // Test 7: Parameter with tabs and mixed whitespace
+ common_chat_msg expected_mixed_whitespace;
+ expected_mixed_whitespace.role = "assistant";
+ expected_mixed_whitespace.tool_calls = {
+ { "tab_function", "{\"content\":\"line1\\n\\tindented line\\n spaces\"}", "" }
+ };
+
+ assert_msg_equals(
+ expected_mixed_whitespace,
+ common_chat_parse(
+ "\n"
+ " \n"
+ " \n"
+ "line1\n"
+ "\tindented line\n"
+ " spaces\n"
+ " \n"
+ " \n"
+ "",
+ /* is_partial= */ false,
+ {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}));
+
+ // Test 8: Control characters and special Unicode
+ common_chat_msg expected_control_chars;
+ expected_control_chars.role = "assistant";
+ expected_control_chars.tool_calls = {
+ { "control_function", "{\"text\":\"Line1\\nLine2\\tTabbed\\rCarriage return\"}", "" }
+ };
+
+ assert_msg_equals(
+ expected_control_chars,
+ common_chat_parse(
+ "\n"
+ " \n"
+ " \n"
+ "Line1\nLine2\tTabbed\rCarriage return\n"
+ " \n"
+ " \n"
+ "",
+ /* is_partial= */ false,
+ {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}));
+
+ // Test 9: Emoji and extended Unicode characters
+ common_chat_msg expected_emoji;
+ expected_emoji.role = "assistant";
+ expected_emoji.tool_calls = {
+ { "emoji_function", "{\"message\":\"Hello! 👋 🌟 🚀 Testing emojis: 😀😃😄😁 and symbols: ∑∏∆∇\"}", "" }
+ };
+
+ assert_msg_equals(
+ expected_emoji,
+ common_chat_parse(
+ "\n"
+ " \n"
+ " \n"
+ " Hello! 👋 🌟 🚀 Testing emojis: 😀😃😄😁 and symbols: ∑∏∆∇\n"
+ " \n"
+ " \n"
+ "",
+ /* is_partial= */ false,
+ {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}));
+
+ // Test 10: Mathematical expressions and formulas
+ common_chat_msg expected_math;
+ expected_math.role = "assistant";
+ expected_math.tool_calls = {
+ { "math_function", "{\"formula\":\"E = mc² and ∫f(x)dx = F(x) + C\"}", "" }
+ };
+
+ assert_msg_equals(
+ expected_math,
+ common_chat_parse(
+ "\n"
+ " \n"
+ " \n"
+ " E = mc² and ∫f(x)dx = F(x) + C\n"
+ " \n"
+ " \n"
+ "",
+ /* is_partial= */ false,
+ {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}));
+
+ // Test 11: SQL injection-like content (should be safely escaped)
+ common_chat_msg expected_sql;
+ expected_sql.role = "assistant";
+ expected_sql.tool_calls = {
+ { "sql_function", "{\"query\":\"SELECT * FROM users WHERE id = 1; DROP TABLE users; --\"}", "" }
+ };
+
+ assert_msg_equals(
+ expected_sql,
+ common_chat_parse(
+ "\n"
+ " \n"
+ " \n"
+ " SELECT * FROM users WHERE id = 1; DROP TABLE users; --\n"
+ " \n"
+ " \n"
+ "",
+ /* is_partial= */ false,
+ {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}));
+
+ // Test 12: HTML/XML injection content
+ common_chat_msg expected_html;
+ expected_html.role = "assistant";
+ expected_html.tool_calls = {
+ { "html_function", "{\"content\":\"
\"}", "" }
+ };
+
+ assert_msg_equals(
+ expected_html,
+ common_chat_parse(
+ "\n"
+ " \n"
+ " \n"
+ "
\n"
+ " \n"
+ " \n"
+ "",
+ /* is_partial= */ false,
+ {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}));
+
+ // Test 13: Binary-like content (base64)
+ common_chat_msg expected_binary;
+ expected_binary.role = "assistant";
+ expected_binary.tool_calls = {
+ { "binary_function", "{\"data\":\"SGVsbG8gV29ybGQhIFRoaXMgaXMgYmFzZTY0IGVuY29kZWQgdGV4dC4=\"}", "" }
+ };
+
+ assert_msg_equals(
+ expected_binary,
+ common_chat_parse(
+ "\n"
+ " \n"
+ " \n"
+ " SGVsbG8gV29ybGQhIFRoaXMgaXMgYmFzZTY0IGVuY29kZWQgdGV4dC4=\n"
+ " \n"
+ " \n"
+ "",
+ /* is_partial= */ false,
+ {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}));
+
+ // Test 14: Very large numbers (should be parsed as scientific notation)
+ common_chat_msg expected_large_numbers;
+ expected_large_numbers.role = "assistant";
+ expected_large_numbers.tool_calls = {
+ { "number_function", "{\"big_int\":1e+60}", "" } // Large number becomes scientific notation
+ };
+
+ assert_msg_equals(
+ expected_large_numbers,
+ common_chat_parse(
+ "\n"
+ " \n"
+ " \n"
+ " 999999999999999999999999999999999999999999999999999999999999\n"
+ " \n"
+ " \n"
+ "",
+ /* is_partial= */ false,
+ {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}));
+
+ printf("✅ All Qwen3-Coder XML error handling and edge case tests passed!\n");
+ }
+ {
+ // Qwen3-Coder template: ensure grammar builds with union types and unsupported {"not": {}} branches
+ auto tmpls = read_templates("models/templates/Qwen3-Coder.jinja");
+ common_chat_templates_inputs inputs;
+ inputs.messages = { message_user };
+
+ common_chat_tool qwen_union_tool {
+ /* .name = */ "qwen_union",
+ /* .description = */ "Test tool for union/anyOf handling",
+ /* .parameters = */ R"({
+ "type": "object",
+ "properties": {
+ "priority": { "type": ["number", "null"] },
+ "maybe_text": { "anyOf": [ { "not": {} }, { "type": "string" } ] },
+ "config": { "anyOf": [ { "type": "object" }, { "type": "null" } ] }
+ },
+ "required": []
+ })",
+ };
+ inputs.tools = { qwen_union_tool };
+
+ auto params = common_chat_templates_apply(tmpls.get(), inputs);
+ assert_equals(COMMON_CHAT_FORMAT_QWEN3_CODER_XML, params.format);
+ assert_equals(false, params.grammar.empty());
+
+ // Grammar should compile successfully
+ auto grammar = build_grammar(params.grammar);
+ if (!grammar) {
+ throw std::runtime_error("Failed to build Qwen3-Coder grammar with union types");
+ }
+ }
+
{
auto tmpls = read_templates("models/templates/ibm-granite-granite-3.3-2B-Instruct.jinja");
std::vector end_tokens{ "<|end_of_text|>" };
@@ -2129,6 +2798,7 @@ static void test_template_output_parsers() {
}
+
static void test_msg_diffs_compute() {
printf("[%s]\n", __func__);
{