From de4343aab626fb94d39d876057c70a7d384d0643 Mon Sep 17 00:00:00 2001 From: Thiago Padilha Date: Sun, 2 Nov 2025 07:12:15 -0300 Subject: [PATCH 1/2] chat: Allow reasoning_content to be passed back This makes it possible for reasoning_content to be passed back to llama-server, which is useful for LLMs like GPT-OSS or Minimax-M2 that were trained for this. --- common/chat.cpp | 5 +++-- tools/server/utils.hpp | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/common/chat.cpp b/common/chat.cpp index 63583fb22489d..06dfb05e11c41 100644 --- a/common/chat.cpp +++ b/common/chat.cpp @@ -201,6 +201,7 @@ std::vector common_chat_msgs_parse_oaicompat(const json & messa msg.role = message.at("role"); auto has_content = message.contains("content"); + auto has_reasoning_content = message.contains("reasoning_content"); auto has_tool_calls = message.contains("tool_calls"); if (has_content) { const auto & content = message.at("content"); @@ -249,8 +250,8 @@ std::vector common_chat_msgs_parse_oaicompat(const json & messa msg.tool_calls.push_back(tc); } } - if (!has_content && !has_tool_calls) { - throw std::runtime_error("Expected 'content' or 'tool_calls' (ref: https://github.com/ggml-org/llama.cpp/issues/8367 & https://github.com/ggml-org/llama.cpp/issues/12279)"); + if (!has_content && !has_tool_calls && !has_reasoning_content) { + throw std::runtime_error("Expected 'content', 'reasoning_content' or 'tool_calls' (ref: https://github.com/ggml-org/llama.cpp/issues/8367 & https://github.com/ggml-org/llama.cpp/issues/12279)"); } if (message.contains("reasoning_content")) { msg.reasoning_content = message.at("reasoning_content"); diff --git a/tools/server/utils.hpp b/tools/server/utils.hpp index b6198edfc487c..29b9c0cb208d4 100644 --- a/tools/server/utils.hpp +++ b/tools/server/utils.hpp @@ -595,8 +595,8 @@ static json oaicompat_chat_params_parse( throw std::runtime_error("All non-assistant messages must contain 'content'"); } if (role == "assistant") { - if (!msg.contains("content") && !msg.contains("tool_calls")) { - throw std::runtime_error("Assistant message must contain either 'content' or 'tool_calls'!"); + if (!msg.contains("content") && !msg.contains("tool_calls") && !msg.contains("reasoning_content")) { + throw std::runtime_error("Assistant message must contain either 'content' or 'tool_calls' or 'reasoning_content'!"); } if (!msg.contains("content")) { continue; // avoid errors with no content From 48237c2f93f1a3c8a1adf2e0eeea2b53ccf93cdb Mon Sep 17 00:00:00 2001 From: Thiago Padilha Date: Sun, 2 Nov 2025 21:12:26 -0300 Subject: [PATCH 2/2] Add test for checking if reasoning_content is accepted --- tools/server/tests/unit/test_chat_completion.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tools/server/tests/unit/test_chat_completion.py b/tools/server/tests/unit/test_chat_completion.py index 392e0efecdbbd..8c7b6ef9f835a 100644 --- a/tools/server/tests/unit/test_chat_completion.py +++ b/tools/server/tests/unit/test_chat_completion.py @@ -476,3 +476,16 @@ def make_cmpl_request(): assert last_progress["total"] > 0 assert last_progress["processed"] == last_progress["total"] assert total_batch_count == batch_count + + +def test_standalone_reasoning_content_is_accepted(): + global server + server.start() + res = server.make_request("POST", "/chat/completions", data={ + "max_tokens": 8, + "messages": [ + {"role": "user", "content": "How much is 102 + 7?"}, + {"role": "assistant", "reasoning_content": "Calculate."}, + ] + }) + assert res.status_code == 200