From e74773ec434fea97b5c365bb9781df3b1fa6c494 Mon Sep 17 00:00:00 2001 From: arch-btw <57669023+arch-btw@users.noreply.github.com> Date: Wed, 23 Oct 2024 04:12:53 -0700 Subject: [PATCH 01/10] Add granite template to llama.cpp --- src/llama.cpp | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/llama.cpp b/src/llama.cpp index 24e1f1f01a857..92865a0fa8ceb 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -21713,7 +21713,17 @@ static int32_t llama_chat_apply_template_internal( ss << message->content << "\n\n"; } } - } else { + } else if (tmpl == "granite" || tmpl_contains("<|start_of_role|>")) { + // IBM Granite template + for (const auto& message : chat) { + std::string role(message->role); + ss << "<|start_of_role|>" << role << "<|end_of_role|>" << "\n" + << message->content << "<|end_of_text|>\n"; + } + if (add_ass) { + ss << "<|start_of_role|>assistant<|end_of_role|>\n"; + } +} else { // template not supported return -1; } From a6679d94bc77b6c470d724c04bae878219f54a02 Mon Sep 17 00:00:00 2001 From: arch-btw <57669023+arch-btw@users.noreply.github.com> Date: Wed, 23 Oct 2024 04:16:42 -0700 Subject: [PATCH 02/10] Add granite template to test-chat-template.cpp --- tests/test-chat-template.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test-chat-template.cpp b/tests/test-chat-template.cpp index 6f046249fa1a8..2eb145e9b036e 100644 --- a/tests/test-chat-template.cpp +++ b/tests/test-chat-template.cpp @@ -65,6 +65,8 @@ int main(void) { u8"{% for message in messages %}{% if message['role'] == 'user' %}{{'<用户>' + message['content'].strip() + ''}}{% else %}{{message['content'].strip()}}{% endif %}{% endfor %}", // DeepSeek-V2 "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{{ bos_token }}{% for message in messages %}{% if message['role'] == 'user' %}{{ 'User: ' + message['content'] + '\n\n' }}{% elif message['role'] == 'assistant' %}{{ 'Assistant: ' + message['content'] + eos_token }}{% elif message['role'] == 'system' %}{{ message['content'] + '\n\n' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ 'Assistant:' }}{% endif %}", + // ibm-granite/granite-3.0-8b-instruct + "<|start_of_role|>system<|end_of_role|>You are a helpful assistant<|end_of_text|>\n<|start_of_role|>user<|end_of_role|>Hello<|end_of_text|>\n<|start_of_role|>assistant<|end_of_role|>Hi there<|end_of_text|>\n<|start_of_role|>user<|end_of_role|>Who are you<|end_of_text|>\n<|start_of_role|>assistant<|end_of_role|> I am an assistant <|end_of_text|>\n<|start_of_role|>user<|end_of_role|>Another question<|end_of_text|>\n<|start_of_role|>assistant<|end_of_role|>", }; std::vector expected_output = { // teknium/OpenHermes-2.5-Mistral-7B From 26f0911aee4adb529c92c544288e4d5192df8e9a Mon Sep 17 00:00:00 2001 From: arch-btw <57669023+arch-btw@users.noreply.github.com> Date: Sat, 26 Oct 2024 04:56:40 -0700 Subject: [PATCH 03/10] Update src/llama.cpp Co-authored-by: Xuan Son Nguyen --- src/llama.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llama.cpp b/src/llama.cpp index 92865a0fa8ceb..255433982fa8c 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -21717,7 +21717,7 @@ static int32_t llama_chat_apply_template_internal( // IBM Granite template for (const auto& message : chat) { std::string role(message->role); - ss << "<|start_of_role|>" << role << "<|end_of_role|>" << "\n" + ss << "<|start_of_role|>" << role << "<|end_of_role|>\n" << message->content << "<|end_of_text|>\n"; } if (add_ass) { From 8fe174dce0c596020ce7004fb8d0ed17a6472a5c Mon Sep 17 00:00:00 2001 From: arch-btw <57669023+arch-btw@users.noreply.github.com> Date: Sat, 26 Oct 2024 05:21:17 -0700 Subject: [PATCH 04/10] Update tests/test-chat-template.cpp Co-authored-by: Xuan Son Nguyen --- tests/test-chat-template.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test-chat-template.cpp b/tests/test-chat-template.cpp index 2eb145e9b036e..10aaccbd942a7 100644 --- a/tests/test-chat-template.cpp +++ b/tests/test-chat-template.cpp @@ -66,7 +66,7 @@ int main(void) { // DeepSeek-V2 "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{{ bos_token }}{% for message in messages %}{% if message['role'] == 'user' %}{{ 'User: ' + message['content'] + '\n\n' }}{% elif message['role'] == 'assistant' %}{{ 'Assistant: ' + message['content'] + eos_token }}{% elif message['role'] == 'system' %}{{ message['content'] + '\n\n' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ 'Assistant:' }}{% endif %}", // ibm-granite/granite-3.0-8b-instruct - "<|start_of_role|>system<|end_of_role|>You are a helpful assistant<|end_of_text|>\n<|start_of_role|>user<|end_of_role|>Hello<|end_of_text|>\n<|start_of_role|>assistant<|end_of_role|>Hi there<|end_of_text|>\n<|start_of_role|>user<|end_of_role|>Who are you<|end_of_text|>\n<|start_of_role|>assistant<|end_of_role|> I am an assistant <|end_of_text|>\n<|start_of_role|>user<|end_of_role|>Another question<|end_of_text|>\n<|start_of_role|>assistant<|end_of_role|>", + "<|start_of_role|>system<|end_of_role|>You are a helpful assistant<|end_of_text|>\n<|start_of_role|>user<|end_of_role|>Hello<|end_of_text|>\n<|start_of_role|>assistant<|end_of_role|>Hi there<|end_of_text|>\n<|start_of_role|>user<|end_of_role|>Who are you<|end_of_text|>\n<|start_of_role|>assistant<|end_of_role|> I am an assistant <|end_of_text|>\n<|start_of_role|>user<|end_of_role|>Another question<|end_of_text|>\n<|start_of_role|>assistant<|end_of_role|>\n", }; std::vector expected_output = { // teknium/OpenHermes-2.5-Mistral-7B From 3cca307ea69c8ace4e4129f6cfa4d9a31de98bbc Mon Sep 17 00:00:00 2001 From: arch-btw <57669023+arch-btw@users.noreply.github.com> Date: Sat, 26 Oct 2024 18:03:17 -0700 Subject: [PATCH 05/10] Added proper template and expected output --- tests/test-chat-template.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/test-chat-template.cpp b/tests/test-chat-template.cpp index 10aaccbd942a7..03e897e66dca4 100644 --- a/tests/test-chat-template.cpp +++ b/tests/test-chat-template.cpp @@ -66,7 +66,7 @@ int main(void) { // DeepSeek-V2 "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{{ bos_token }}{% for message in messages %}{% if message['role'] == 'user' %}{{ 'User: ' + message['content'] + '\n\n' }}{% elif message['role'] == 'assistant' %}{{ 'Assistant: ' + message['content'] + eos_token }}{% elif message['role'] == 'system' %}{{ message['content'] + '\n\n' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ 'Assistant:' }}{% endif %}", // ibm-granite/granite-3.0-8b-instruct - "<|start_of_role|>system<|end_of_role|>You are a helpful assistant<|end_of_text|>\n<|start_of_role|>user<|end_of_role|>Hello<|end_of_text|>\n<|start_of_role|>assistant<|end_of_role|>Hi there<|end_of_text|>\n<|start_of_role|>user<|end_of_role|>Who are you<|end_of_text|>\n<|start_of_role|>assistant<|end_of_role|> I am an assistant <|end_of_text|>\n<|start_of_role|>user<|end_of_role|>Another question<|end_of_text|>\n<|start_of_role|>assistant<|end_of_role|>\n", + "{%- if tools %}\n {{- '<|start_of_role|>available_tools<|end_of_role|>\n' }}\n {%- for tool in tools %}\n {{- tool | tojson(indent=4) }}\n {%- if not loop.last %}\n {{- '\n\n' }}\n {%- endif %}\n {%- endfor %}\n {{- '<|end_of_text|>\n' }}\n{%- endif %}\n{%- for message in messages %}\n {%- if message['role'] == 'system' %}\n {{- '<|start_of_role|>system<|end_of_role|>' + message['content'] + '<|end_of_text|>\n' }}\n {%- elif message['role'] == 'user' %}\n {{- '<|start_of_role|>user<|end_of_role|>' + message['content'] + '<|end_of_text|>\n' }}\n {%- elif message['role'] == 'assistant' %}\n {{- '<|start_of_role|>assistant<|end_of_role|>' + message['content'] + '<|end_of_text|>\n' }}\n {%- elif message['role'] == 'assistant_tool_call' %}\n {{- '<|start_of_role|>assistant<|end_of_role|><|tool_call|>' + message['content'] + '<|end_of_text|>\n' }}\n {%- elif message['role'] == 'tool_response' %}\n {{- '<|start_of_role|>tool_response<|end_of_role|>' + message['content'] + '<|end_of_text|>\n' }}\n {%- endif %}\n {%- if loop.last and add_generation_prompt %}\n {{- '<|start_of_role|>assistant<|end_of_role|>' }}\n {%- endif %}\n{%- endfor %}", }; std::vector expected_output = { // teknium/OpenHermes-2.5-Mistral-7B @@ -111,6 +111,8 @@ int main(void) { u8"You are a helpful assistant<用户>HelloHi there<用户>Who are youI am an assistant<用户>Another question", // DeepSeek-V2 u8"You are a helpful assistant\n\nUser: Hello\n\nAssistant: Hi there<|end▁of▁sentence|>User: Who are you\n\nAssistant: I am an assistant <|end▁of▁sentence|>User: Another question\n\nAssistant:", + // ibm-granite/granite-3.0-8b-instruct + "<|start_of_role|>system<|end_of_role|>You are a helpful assistant<|end_of_text|>\n<|start_of_role|>user<|end_of_role|>Hello<|end_of_text|>\n<|start_of_role|>assistant<|end_of_role|>Hi there<|end_of_text|>\n<|start_of_role|>user<|end_of_role|>Who are you<|end_of_text|>\n<|start_of_role|>assistant<|end_of_role|> I am an assistant <|end_of_text|>\n<|start_of_role|>user<|end_of_role|>Another question<|end_of_text|>\n<|start_of_role|>assistant<|end_of_role|>\n", }; std::vector formatted_chat(1024); int32_t res; From 60ed87077dc0fac70a15f8a2bbd2d4fe677d2a1a Mon Sep 17 00:00:00 2001 From: arch-btw <57669023+arch-btw@users.noreply.github.com> Date: Sat, 26 Oct 2024 18:09:23 -0700 Subject: [PATCH 06/10] Small change to \n Small change to \n --- src/llama.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llama.cpp b/src/llama.cpp index 255433982fa8c..68c23676218f3 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -21717,7 +21717,7 @@ static int32_t llama_chat_apply_template_internal( // IBM Granite template for (const auto& message : chat) { std::string role(message->role); - ss << "<|start_of_role|>" << role << "<|end_of_role|>\n" + ss << "<|start_of_role|>" << role << "<|end_of_role|>" << message->content << "<|end_of_text|>\n"; } if (add_ass) { From 50ef6ca3b950c383679e22cbfac0a0186204443f Mon Sep 17 00:00:00 2001 From: arch-btw <57669023+arch-btw@users.noreply.github.com> Date: Mon, 28 Oct 2024 04:20:02 -0700 Subject: [PATCH 07/10] Add code space & Co-authored-by: Xuan Son Nguyen --- src/llama.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llama.cpp b/src/llama.cpp index 68c23676218f3..fe5c9e5cca84f 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -21715,7 +21715,7 @@ static int32_t llama_chat_apply_template_internal( } } else if (tmpl == "granite" || tmpl_contains("<|start_of_role|>")) { // IBM Granite template - for (const auto& message : chat) { + for (const auto & message : chat) { std::string role(message->role); ss << "<|start_of_role|>" << role << "<|end_of_role|>" << message->content << "<|end_of_text|>\n"; From 839cf4ccabe84322d99c6e1a9800456a16498df8 Mon Sep 17 00:00:00 2001 From: arch-btw <57669023+arch-btw@users.noreply.github.com> Date: Mon, 28 Oct 2024 04:32:20 -0700 Subject: [PATCH 08/10] Fix spacing --- src/llama.cpp | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/llama.cpp b/src/llama.cpp index fe5c9e5cca84f..d85e1f12db398 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -21714,16 +21714,16 @@ static int32_t llama_chat_apply_template_internal( } } } else if (tmpl == "granite" || tmpl_contains("<|start_of_role|>")) { - // IBM Granite template - for (const auto & message : chat) { - std::string role(message->role); - ss << "<|start_of_role|>" << role << "<|end_of_role|>" - << message->content << "<|end_of_text|>\n"; - } - if (add_ass) { - ss << "<|start_of_role|>assistant<|end_of_role|>\n"; - } -} else { + // IBM Granite template + for (const auto & message: chat) { + std::string role(message -> role); + ss << "<|start_of_role|>" << role << "<|end_of_role|>" + << message -> content << "<|end_of_text|>\n"; + } + if (add_ass) { + ss << "<|start_of_role|>assistant<|end_of_role|>\n"; + } + } else { // template not supported return -1; } From 8b0b64bb759229493cea1f82329da4294a035d17 Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Mon, 28 Oct 2024 16:09:26 +0100 Subject: [PATCH 09/10] Apply suggestions from code review --- src/llama.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/llama.cpp b/src/llama.cpp index d85e1f12db398..4716a2eee7063 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -21716,9 +21716,9 @@ static int32_t llama_chat_apply_template_internal( } else if (tmpl == "granite" || tmpl_contains("<|start_of_role|>")) { // IBM Granite template for (const auto & message: chat) { - std::string role(message -> role); + std::string role(message->role); ss << "<|start_of_role|>" << role << "<|end_of_role|>" - << message -> content << "<|end_of_text|>\n"; + << message->content << "<|end_of_text|>\n"; } if (add_ass) { ss << "<|start_of_role|>assistant<|end_of_role|>\n"; From 337338813bf1983092f74edda51e0ddbaa5ad143 Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Mon, 28 Oct 2024 16:10:25 +0100 Subject: [PATCH 10/10] Update src/llama.cpp --- src/llama.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llama.cpp b/src/llama.cpp index 4716a2eee7063..d7af1b9ea24df 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -21715,7 +21715,7 @@ static int32_t llama_chat_apply_template_internal( } } else if (tmpl == "granite" || tmpl_contains("<|start_of_role|>")) { // IBM Granite template - for (const auto & message: chat) { + for (const auto & message : chat) { std::string role(message->role); ss << "<|start_of_role|>" << role << "<|end_of_role|>" << message->content << "<|end_of_text|>\n";