Skip to content
This repository was archived by the owner on Sep 23, 2025. It is now read-only.

Commit c43a192

Browse files
committed
update
Signed-off-by: minmingzhu <[email protected]>
1 parent 0057c48 commit c43a192

File tree

2 files changed

+43
-36
lines changed

2 files changed

+43
-36
lines changed

llm_on_ray/common/dataprocesser/general_processer.py

Lines changed: 23 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -132,15 +132,19 @@ def create_data(self, examples):
132132
)
133133
else:
134134
new_messages = [
135+
{
136+
"role": "system",
137+
"content": INTRO_BLURB + "\n",
138+
},
135139
{
136140
"role": "user",
137141
"content": examples["instruction"]
138-
+ "\n\n"
142+
+ "\n"
139143
+ INPUT_KEY
140144
+ examples["context"]
141-
+ "\n\n",
145+
+ "\n",
142146
},
143-
{"role": "assistant", "content": examples["response"] + "\n\n"},
147+
{"role": "assistant", "content": examples["response"] + "\n"},
144148
]
145149

146150
return new_messages
@@ -162,7 +166,6 @@ def tokenize_func(self, tokenizer, message):
162166
message,
163167
tokenize=False,
164168
)
165-
print(new_tokenizer)
166169
return tokenizer(
167170
new_tokenizer, add_special_tokens=False, max_length=self.config.get("max_length")
168171
)
@@ -243,21 +246,9 @@ def group_texts(examples):
243246

244247

245248
class SlimOrcaDataPreprocess(ChatDataPreprocess):
246-
chat_template = (
247-
"{% for message in messages %}"
248-
"{% if message['role'] == 'system' %}"
249-
"{{ '### System: ' + message['content'] }}"
250-
"{% elif message['role'] == 'user' %}"
251-
"{{ '### User: ' + message['content'] }}"
252-
"{% elif message['role'] == 'assistant' %}"
253-
"{{ '### Assistant: ' + message['content'] }}"
254-
"{% endif %}"
255-
"{% endfor %}"
256-
)
257249

258250
def __init__(self, config):
259251
super().__init__(config)
260-
self.config["chat_template"] = self.chat_template
261252
self.default_system = "You are a helpful, respectful and honest assistant."
262253

263254
def create_data(self, data):
@@ -278,22 +269,26 @@ def create_data(self, data):
278269
examples[conv[j]["from"]] = conv[j]["value"]
279270
examples[conv[j + 1]["from"]] = conv[j + 1]["value"]
280271

281-
new_messages = [
282-
{"role": "system", "content": examples["system"] + "\n"},
283-
{
284-
"role": "user",
285-
"content": examples["human"] + "\n",
286-
},
287-
{"role": "assistant", "content": examples["gpt"] + "\n"},
288-
]
289272
if self.config.get("gpt_base_model"):
290273
if examples["human"]:
291-
return SLIMORCA_PROMPT_DICT["prompt_with_input"].format(
292-
system=examples["system"], user=examples["human"], gpt=examples["gpt"]
274+
return PROMPT_WITH_INPUT_FORMAT.format(
275+
instruction=examples["system"], response=examples["gpt"], input=examples["human"]
293276
)
294277
else:
295-
return SLIMORCA_PROMPT_DICT["prompt_with_input"].format(
296-
system=examples["human"], gpt=examples["gpt"]
278+
return PROMPT_NO_INPUT_FORMAT.format(
279+
instruction=examples["system"], response=examples["gpt"]
297280
)
298281
else:
282+
new_messages = [
283+
{"role": "system", "content": INTRO_BLURB + "\n"},
284+
{
285+
"role": "user",
286+
"content": examples["system"]
287+
+ "\n"
288+
+ INPUT_KEY
289+
+ examples["human"]
290+
+ "\n",
291+
},
292+
{"role": "assistant", "content": examples["gpt"] + "\n"},
293+
]
299294
return new_messages

tests/finetune/test_chat_template.py

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -27,14 +27,26 @@ def setUp(self):
2727
"gpt_base_model": True,
2828
"max_length": 512,
2929
"trust_remote_code": False,
30-
"chat_template": "Below is an instruction that describes a task. Write a response that appropriately "
31-
"completes the request\n {% if messages[0]['role'] == 'system' %}{{ raise_exception("
32-
"'System role not supported') }}{% endif %}{% for message in messages %}{% if (message["
33-
"'role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles "
34-
"must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] "
35-
"== 'user' %}{{ '### Instruction: ' + message['content'] }}{% elif message['role'] == "
36-
"'assistant' %}{{ '### Response: ' + message['content'] }}{% endif %}{% endfor %}{{'### "
37-
"End \n'}}",
30+
"chat_template": "{% if messages[0]['role'] == 'system' %}"
31+
"{% set loop_messages = messages[1:] %}"
32+
"{% set system_message = messages[0]['content'] %}"
33+
"{% else %}"
34+
"{% set loop_messages = messages %}"
35+
"{% set system_message = false %}"
36+
"{% endif %}"
37+
"{% for message in loop_messages %}"
38+
"{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}"
39+
"{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}"
40+
"{% endif %}"
41+
"{% if loop.index0 == 0 and system_message %}"
42+
"{{ system_message }}"
43+
"{% endif %}"
44+
"{% if message['role'] == 'user' %}"
45+
"{{ '### Instruction: ' + message['content'] + eos_token }}"
46+
"{% elif message['role'] == 'assistant' %}"
47+
"{{ '### Response:' + message['content'] + eos_token }}"
48+
"{% endif %}{% endfor %}"
49+
"{{'### End \n'}}",
3850
}
3951
self.processer = ChatDataPreprocess(self.config)
4052

0 commit comments

Comments
 (0)