From 59c81ee95cd4e17fe679df20fa8e95393377cf64 Mon Sep 17 00:00:00 2001
From: Wannaphong Phatthiyaphaibun <wannaphong@yahoo.com>
Date: Tue, 11 Jul 2023 13:11:00 -0700
Subject: [PATCH 1/5] Add first file for pythainlp.chat and decoder model
 (generate)

---
 pythainlp/chat/__init__.py          | 22 ++++++++++++++++++++++
 pythainlp/generate/decoder_model.py | 15 +++++++++++++++
 2 files changed, 37 insertions(+)
 create mode 100644 pythainlp/chat/__init__.py
 create mode 100644 pythainlp/generate/decoder_model.py

diff --git a/pythainlp/chat/__init__.py b/pythainlp/chat/__init__.py
new file mode 100644
index 000000000..6400a04f4
--- /dev/null
+++ b/pythainlp/chat/__init__.py
@@ -0,0 +1,22 @@
+# -*- coding: utf-8 -*-
+# Copyright (C) 2016-2023 PyThaiNLP Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+pythainlp.chat
+
+WIP
+"""
+
+__all__ = []
+
diff --git a/pythainlp/generate/decoder_model.py b/pythainlp/generate/decoder_model.py
new file mode 100644
index 000000000..d03ba13b7
--- /dev/null
+++ b/pythainlp/generate/decoder_model.py
@@ -0,0 +1,15 @@
+# -*- coding: utf-8 -*-
+# Copyright (C) 2016-2023 PyThaiNLP Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# WIP
\ No newline at end of file

From ce1eeb2331e89f2bde0f169ffe83c76c638a89ec Mon Sep 17 00:00:00 2001
From: Wannaphong Phatthiyaphaibun <wannaphong@yahoo.com>
Date: Wed, 12 Jul 2023 23:00:54 -0700
Subject: [PATCH 2/5] Add wangchanglm to pythainlp.generate

---
 .../decoder_model.py => chat/core.py}         |   3 +-
 pythainlp/generate/wangchanglm.py             | 124 ++++++++++++++++++
 2 files changed, 125 insertions(+), 2 deletions(-)
 rename pythainlp/{generate/decoder_model.py => chat/core.py} (93%)
 create mode 100644 pythainlp/generate/wangchanglm.py

diff --git a/pythainlp/generate/decoder_model.py b/pythainlp/chat/core.py
similarity index 93%
rename from pythainlp/generate/decoder_model.py
rename to pythainlp/chat/core.py
index d03ba13b7..be76af521 100644
--- a/pythainlp/generate/decoder_model.py
+++ b/pythainlp/chat/core.py
@@ -11,5 +11,4 @@
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
-# limitations under the License.
-# WIP
\ No newline at end of file
+# limitations under the License.
\ No newline at end of file
diff --git a/pythainlp/generate/wangchanglm.py b/pythainlp/generate/wangchanglm.py
new file mode 100644
index 000000000..7d0c1fc32
--- /dev/null
+++ b/pythainlp/generate/wangchanglm.py
@@ -0,0 +1,124 @@
+# -*- coding: utf-8 -*-
+# Copyright (C) 2016-2023 PyThaiNLP Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import re
+import pandas as pd
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+
+
+class WangChanGLM:
+    def __init__(self):
+        self.exclude_pattern = re.compile(r'[^ก-๙]+')
+        self.PROMPT_DICT = {
+            "prompt_input": (
+                "<context>: {input}\n<human>: {instruction}\n<bot>: "
+            ),
+            "prompt_no_input": (
+                "<human>: {instruction}\n<bot>: "
+            ),
+        }
+    def is_exclude(self, text):
+        return bool(self.exclude_pattern.search(text))
+    def load_model(
+        self,
+        model_path,
+        return_dict=True,
+        load_in_8bit=False,
+        device_map="auto",
+        torch_dtype=torch.float16,
+        offload_folder="./",
+        low_cpu_mem_usage=True,
+        **
+    ):
+        self.model_path = model_path
+        self.model = AutoModelForCausalLM.from_pretrained(
+            self.model_path
+            return_dict=return_dict,
+            load_in_8bit=load_in_8bit,
+            device_map=device_map,
+            torch_dtype=torch_dtype,
+            offload_folder=offload_folder,
+            low_cpu_mem_usage=low_cpu_mem_usage,
+            **
+        )
+        self.tokenizer = AutoTokenizer.from_pretrained(self.model_path)
+        self.df = pd.DataFrame(self.tokenizer.vocab.items(), columns=['text', 'idx'])
+        self.df['is_exclude'] = self.df.text.map(self.is_exclude)
+        self.exclude_ids = self.df[self.df.is_exclude==True].idx.tolist()
+    def gen_instruct(
+        self,
+        text,
+        max_new_tokens=512,
+        top_p=0.95,
+        temperature=0.9,
+        top_k=50,
+        no_repeat_ngram_size=2,
+        typical_p=1.
+    ):
+        batch = self.tokenizer(text, return_tensors="pt")
+        with torch.cuda.amp.autocast(): # cuda -> cpu if cpu
+            if Thai=="Yes":
+                output_tokens = self.model.generate(
+                    input_ids=batch["input_ids"],
+                    max_new_tokens=max_new_tokens, # 512
+                    begin_suppress_tokens = self.exclude_ids,
+                    no_repeat_ngram_size=no_repeat_ngram_size,
+                    #oasst k50
+                    top_k=top_k,
+                    top_p=top_p, # 0.95
+                    typical_p=typical_p,
+                    temperature=temperature, # 0.9
+                )
+            else:
+                output_tokens = self.model.generate(
+                    input_ids=batch["input_ids"],
+                    max_new_tokens=max_new_tokens, # 512
+                    no_repeat_ngram_size=no_repeat_ngram_size,
+                    #oasst k50
+                    top_k=top_k,
+                    top_p=top_p, # 0.95
+                    typical_p=typical_p,
+                    temperature=temperature, # 0.9
+                )
+        return self.tokenizer.decode(output_tokens[0][len(batch["input_ids"][0]):], skip_special_tokens=True)
+    def instruct_generate(
+        self,
+        instruct: str,
+        context: str = None,
+        max_gen_len=512,
+        temperature: float =0.9,
+        top_p: float = 0.95,
+        top_k=50,
+        no_repeat_ngram_size=2,
+        typical_p=1
+    ):
+        if context == None or context=="":
+            prompt = self.PROMPT_DICT['prompt_no_input'].format_map(
+                {'instruction': instruct, 'input': ''}
+            )
+        else:
+            prompt = self.PROMPT_DICT['prompt_input'].format_map(
+                {'instruction': instruct, 'input': context}
+            )
+        result = self.gen_instruct(
+            prompt,
+            max_gen_len=max_gen_len,
+            top_p=top_p,
+            top_k=top_k,
+            temperature=temperature,
+            no_repeat_ngram_size=no_repeat_ngram_size,
+            typical_p=typical_p
+        )
+        return result
\ No newline at end of file

From 22bfdc42599f313791ee8a50e81b2c136deaedfb Mon Sep 17 00:00:00 2001
From: Wannaphong Phatthiyaphaibun <wannaphong@yahoo.com>
Date: Thu, 13 Jul 2023 09:34:42 -0700
Subject: [PATCH 3/5] Update pythainlp.generate.wangchanglm

---
 pythainlp/chat/core.py            | 16 ++++++++++++-
 pythainlp/generate/wangchanglm.py | 38 +++++++++++++++++--------------
 2 files changed, 36 insertions(+), 18 deletions(-)

diff --git a/pythainlp/chat/core.py b/pythainlp/chat/core.py
index be76af521..14c5b9cac 100644
--- a/pythainlp/chat/core.py
+++ b/pythainlp/chat/core.py
@@ -11,4 +11,18 @@
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
-# limitations under the License.
\ No newline at end of file
+# limitations under the License.
+
+class Chat:
+    def __init__(self):
+        pass
+    def load_model(self, model_path,load_in_8bit=False,offload_folder="./",**):
+        if model_path == "wangchanglm":
+            from pythainlp.generate.wangchanglm import WangChanGLM
+            self.model = WangChanGLM()
+            self.model.load_model(
+                model_path="pythainlp/wangchanglm-7.5B-sft-en-8bit-sharded",
+                load_in_8bit=load_in_8bit,
+                offload_folder=offload_folder,
+                **
+            )
\ No newline at end of file
diff --git a/pythainlp/generate/wangchanglm.py b/pythainlp/generate/wangchanglm.py
index 7d0c1fc32..b936ec4ef 100644
--- a/pythainlp/generate/wangchanglm.py
+++ b/pythainlp/generate/wangchanglm.py
@@ -33,25 +33,25 @@ def is_exclude(self, text):
         return bool(self.exclude_pattern.search(text))
     def load_model(
         self,
-        model_path,
+        model_path="pythainlp/wangchanglm-7.5B-sft-en-sharded",
         return_dict=True,
         load_in_8bit=False,
-        device_map="auto",
+        device="cuda",
         torch_dtype=torch.float16,
         offload_folder="./",
-        low_cpu_mem_usage=True,
-        **
+        low_cpu_mem_usage=True
     ):
+        self.device = device
+        self.torch_dtype = torch_dtype
         self.model_path = model_path
         self.model = AutoModelForCausalLM.from_pretrained(
-            self.model_path
+            self.model_path,
             return_dict=return_dict,
             load_in_8bit=load_in_8bit,
-            device_map=device_map,
+            device_map=device,
             torch_dtype=torch_dtype,
             offload_folder=offload_folder,
-            low_cpu_mem_usage=low_cpu_mem_usage,
-            **
+            low_cpu_mem_usage=low_cpu_mem_usage
         )
         self.tokenizer = AutoTokenizer.from_pretrained(self.model_path)
         self.df = pd.DataFrame(self.tokenizer.vocab.items(), columns=['text', 'idx'])
@@ -65,11 +65,13 @@ def gen_instruct(
         temperature=0.9,
         top_k=50,
         no_repeat_ngram_size=2,
-        typical_p=1.
+        typical_p=1.,
+        thai_only=True,
+        skip_special_tokens=True
     ):
         batch = self.tokenizer(text, return_tensors="pt")
-        with torch.cuda.amp.autocast(): # cuda -> cpu if cpu
-            if Thai=="Yes":
+        with torch.autocast(device_type=self.device, dtype=self.torch_dtype):
+            if thai_only:
                 output_tokens = self.model.generate(
                     input_ids=batch["input_ids"],
                     max_new_tokens=max_new_tokens, # 512
@@ -92,17 +94,18 @@ def gen_instruct(
                     typical_p=typical_p,
                     temperature=temperature, # 0.9
                 )
-        return self.tokenizer.decode(output_tokens[0][len(batch["input_ids"][0]):], skip_special_tokens=True)
+        return self.tokenizer.decode(output_tokens[0][len(batch["input_ids"][0]):], skip_special_tokens=skip_special_tokens)
     def instruct_generate(
         self,
         instruct: str,
         context: str = None,
-        max_gen_len=512,
+        max_new_tokens=512,
         temperature: float =0.9,
         top_p: float = 0.95,
         top_k=50,
         no_repeat_ngram_size=2,
-        typical_p=1
+        typical_p=1,
+        thai_only=True
     ):
         if context == None or context=="":
             prompt = self.PROMPT_DICT['prompt_no_input'].format_map(
@@ -114,11 +117,12 @@ def instruct_generate(
             )
         result = self.gen_instruct(
             prompt,
-            max_gen_len=max_gen_len,
+            max_new_tokens=max_new_tokens,
             top_p=top_p,
             top_k=top_k,
             temperature=temperature,
             no_repeat_ngram_size=no_repeat_ngram_size,
-            typical_p=typical_p
+            typical_p=typical_p,
+            thai_only=thai_only
         )
-        return result
\ No newline at end of file
+        return result,prompt
\ No newline at end of file

From 770682dc5780730552231fbecafaa6debac66e29 Mon Sep 17 00:00:00 2001
From: Wannaphong Phatthiyaphaibun <wannaphong@yahoo.com>
Date: Thu, 13 Jul 2023 22:08:43 -0700
Subject: [PATCH 4/5] Add docs

---
 docs/api/chat.rst                 |    7 +
 docs/api/generate.rst             |    4 +-
 docs/notes/installation.rst       |    1 +
 notebooks/test-chat.ipynb         |  236 +++++
 notebooks/test-wangchanglm.ipynb  | 1585 +++++++++++++++++++++++++++++
 pythainlp/chat/__init__.py        |    5 +-
 pythainlp/chat/core.py            |   60 +-
 pythainlp/generate/wangchanglm.py |   96 +-
 setup.py                          |    5 +
 9 files changed, 1964 insertions(+), 35 deletions(-)
 create mode 100644 docs/api/chat.rst
 create mode 100644 notebooks/test-chat.ipynb
 create mode 100644 notebooks/test-wangchanglm.ipynb

diff --git a/docs/api/chat.rst b/docs/api/chat.rst
new file mode 100644
index 000000000..3d0d9d2e2
--- /dev/null
+++ b/docs/api/chat.rst
@@ -0,0 +1,7 @@
+.. currentmodule:: pythainlp.chat
+
+pythainlp.chat
+==============
+
+.. autoclass:: ChatBotModel
+   :members:
\ No newline at end of file
diff --git a/docs/api/generate.rst b/docs/api/generate.rst
index 02459dfc3..910bba27d 100644
--- a/docs/api/generate.rst
+++ b/docs/api/generate.rst
@@ -13,4 +13,6 @@ Modules
     :members:
 .. autoclass:: Trigram
     :members:
-.. autofunction:: pythainlp.generate.thai2fit.gen_sentence
\ No newline at end of file
+.. autofunction:: pythainlp.generate.thai2fit.gen_sentence
+.. autoclass:: pythainlp.generate.wangchanglm.WangChanGLM
+   :members:
\ No newline at end of file
diff --git a/docs/notes/installation.rst b/docs/notes/installation.rst
index fa5bdb896..c276f2b60 100644
--- a/docs/notes/installation.rst
+++ b/docs/notes/installation.rst
@@ -36,6 +36,7 @@ where ``extras`` can be
   - ``transformers_ud`` (to support transformers_ud engine)
   - ``dependency_parsing`` (to support dependency parsing with all engine)
   - ``coreference_resolution`` (to support coreference esolution with all engine)
+  - ``wangchanglm`` (to support wangchanglm model)
   - ``full`` (install everything)
 
 For dependency details, look at `extras` variable in `setup.py <https://github.com/PyThaiNLP/pythainlp/blob/dev/setup.py>`_.
diff --git a/notebooks/test-chat.ipynb b/notebooks/test-chat.ipynb
new file mode 100644
index 000000000..d3c64f3c1
--- /dev/null
+++ b/notebooks/test-chat.ipynb
@@ -0,0 +1,236 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "3ad128a6-2959-431f-b5ff-d9e15761c9c0",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "from pythainlp.chat.core import ChatBotModel\n",
+    "import torch"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "35760aec-f47a-4d33-ad1c-a8230194180c",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "chatbot = ChatBotModel()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "99129184-3a9f-4871-bfb9-ce611e80ff55",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Setting ds_accelerator to cuda (auto detect)\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "54dd6a2c6afa41959bfb11ec98b30562",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Loading checkpoint shards:   0%|          | 0/98 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "chatbot.load_model(device=\"cpu\",torch_dtype=torch.bfloat16)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "2f43d3a7-049c-47b4-86a4-7d89cc61f09d",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'ยินดีที่ได้รู้จัก'"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "chatbot.chat(\"สวัสดี\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "736436d8-2c49-466c-ad3e-006b4827274b",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[('สวัสดี', 'ยินดีที่ได้รู้จัก')]"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "chatbot.history"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "cd9bdae5-93c5-4990-ad18-1171148d0a8c",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'ดอกไม้เป็นสิ่งสวยงามที่ธรรมชาติมอบให้มนุษย์ มนุษย์จึงได้นําดอกไม้มาประดับตกแต่งบ้าน สวน ร้านค้า ร้านอาหาร และสถานที่ต่างๆ มากมาย ปัจจุบันมีดอกไม้หลากหลายชนิดให้เราได้เลือกปลูก แต่ละชนิดมีวิธีการปลูกที่แตกต่างกันไป ขึ้นอยู่กับชนิดของดอกไม้นั้นๆ ดังนี้'"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "chatbot.chat(\"แนะนำวิธีปลูกดอกไม้ให้หน่อย\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "73bd8293-f4ad-4bb1-b912-9542cf404d74",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[('สวัสดี', 'ยินดีที่ได้รู้จัก'),\n",
+       " ('แนะนำวิธีปลูกดอกไม้ให้หน่อย',\n",
+       "  'ดอกไม้เป็นสิ่งสวยงามที่ธรรมชาติมอบให้มนุษย์ มนุษย์จึงได้นําดอกไม้มาประดับตกแต่งบ้าน สวน ร้านค้า ร้านอาหาร และสถานที่ต่างๆ มากมาย ปัจจุบันมีดอกไม้หลากหลายชนิดให้เราได้เลือกปลูก แต่ละชนิดมีวิธีการปลูกที่แตกต่างกันไป ขึ้นอยู่กับชนิดของดอกไม้นั้นๆ ดังนี้')]"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "chatbot.history"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "3e0bac2d-7ba4-4bf7-aff1-2b47f0223e41",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'ดอกกุหลาบ ปลูกง่าย ดูแลง่าย ให้ดอกดก ทนทาน นิยมปลูกเป็นไม้กระถาง ดินที่ใช้ปลูกควรเป็นดินร่วนซุยผสมปุ๋ยคอกหรือปุ่ยหมัก รดน้ําให้ชุ่มแต่อย่าแฉะเกินไป แสงแดดจัด อุณหภูมิที่เหมาะสมคือ 20-25 องศาเซลเซียส ปุยหมักจะใช้ปุ้ยหอยหรือแกลบดิบก็ได้ ต้นกุหลายจะออกดอกในช่วงเดือนมีนาคม-เมษายน'"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "chatbot.chat(\"ขอรายละเอียดเพิ่มเติมหน่อย\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "c047ee41-3850-4d0d-88af-6e659b2b2acb",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[('สวัสดี', 'ยินดีที่ได้รู้จัก'),\n",
+       " ('แนะนำวิธีปลูกดอกไม้ให้หน่อย',\n",
+       "  'ดอกไม้เป็นสิ่งสวยงามที่ธรรมชาติมอบให้มนุษย์ มนุษย์จึงได้นําดอกไม้มาประดับตกแต่งบ้าน สวน ร้านค้า ร้านอาหาร และสถานที่ต่างๆ มากมาย ปัจจุบันมีดอกไม้หลากหลายชนิดให้เราได้เลือกปลูก แต่ละชนิดมีวิธีการปลูกที่แตกต่างกันไป ขึ้นอยู่กับชนิดของดอกไม้นั้นๆ ดังนี้'),\n",
+       " ('ขอรายละเอียดเพิ่มเติมหน่อย',\n",
+       "  'ดอกกุหลาบ ปลูกง่าย ดูแลง่าย ให้ดอกดก ทนทาน นิยมปลูกเป็นไม้กระถาง ดินที่ใช้ปลูกควรเป็นดินร่วนซุยผสมปุ๋ยคอกหรือปุ่ยหมัก รดน้ําให้ชุ่มแต่อย่าแฉะเกินไป แสงแดดจัด อุณหภูมิที่เหมาะสมคือ 20-25 องศาเซลเซียส ปุยหมักจะใช้ปุ้ยหอยหรือแกลบดิบก็ได้ ต้นกุหลายจะออกดอกในช่วงเดือนมีนาคม-เมษายน')]"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "chatbot.history"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0b77ea2f-df5a-442b-a5ae-537511479d8e",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/notebooks/test-wangchanglm.ipynb b/notebooks/test-wangchanglm.ipynb
new file mode 100644
index 000000000..2235ad0a0
--- /dev/null
+++ b/notebooks/test-wangchanglm.ipynb
@@ -0,0 +1,1585 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "142dbd98-d2ac-47d0-98e8-8b95f2ab34f3",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "from pythainlp.generate.wangchanglm import WangChanGLM\n",
+    "import torch"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "0f8ed6b5-6f4c-4d99-a046-9d2c8e1bcd0d",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "m=WangChanGLM()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "78a1ff09-7302-415d-8715-5a0e07a9ff2c",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Setting ds_accelerator to cuda (auto detect)\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "37ab5d0007e3490ca319760b421c47ca",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading shards:   0%|          | 0/98 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "454d9f99b7b1476385fdcd5c14ffd78f",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00002-of-00099.bin:   0%|          | 0.00/4.19G [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "f8d7c359554848359535aba781852ad9",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00003-of-00099.bin:   0%|          | 0.00/269M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "89d1d8248f18470abc45ff60699a27bc",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00004-of-00099.bin:   0%|          | 0.00/269M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "1a8711f523624c8d814d17e0a891836e",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00005-of-00099.bin:   0%|          | 0.00/336M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "60e12001575b4b8d885cd9b9c3feb308",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00006-of-00099.bin:   0%|          | 0.00/201M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "9988f1c9db204df0936f2b54103646ce",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00007-of-00099.bin:   0%|          | 0.00/269M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "c7a37265a54747cd98e8f142f67e66c6",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00008-of-00099.bin:   0%|          | 0.00/336M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "72f03d2f3adb42eaa12c90a28c86990d",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00009-of-00099.bin:   0%|          | 0.00/201M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "a4e9059faa2940598c93313d304319fc",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00010-of-00099.bin:   0%|          | 0.00/269M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "4f7013e7b0f34bf4987d8982eca542db",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00011-of-00099.bin:   0%|          | 0.00/336M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "bb5b17ca259b40ffa2c78e278f4ea03e",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00012-of-00099.bin:   0%|          | 0.00/201M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "25007a7bb6604186b7b95ed8440f707b",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00013-of-00099.bin:   0%|          | 0.00/269M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "b9667657ea1e49109ed3327cd05a0ff6",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00014-of-00099.bin:   0%|          | 0.00/336M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "c82a3d5ae72e45e3a6742662a4248a74",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00015-of-00099.bin:   0%|          | 0.00/201M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "85653246144342e6b09889666eff7c9c",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00016-of-00099.bin:   0%|          | 0.00/269M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "3c1c708dc58e40ffb3171b0d160939d4",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00017-of-00099.bin:   0%|          | 0.00/336M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "2ab325212ee84913ad9aab07cbdf028e",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00018-of-00099.bin:   0%|          | 0.00/201M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "f2e00f46382c4ea3933960cbd867fb8b",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00019-of-00099.bin:   0%|          | 0.00/269M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "3a465cefb51348f2bc02941c099ea1d5",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00020-of-00099.bin:   0%|          | 0.00/336M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "ef81fd98cdf04dec84ddd04ff3e8babf",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00021-of-00099.bin:   0%|          | 0.00/201M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "64a8158835b64019b842f56d4aea0147",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00022-of-00099.bin:   0%|          | 0.00/269M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "d725ac70d64b4b27b3345bf6f290966c",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00023-of-00099.bin:   0%|          | 0.00/336M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "dd0aadca8b4c423581e695df4c821bb8",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00024-of-00099.bin:   0%|          | 0.00/201M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "5ea91456c4d7448ca85d0b142926e18d",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00025-of-00099.bin:   0%|          | 0.00/269M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "59dbfeecf5a84511bafd095e580c05a9",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00026-of-00099.bin:   0%|          | 0.00/336M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "7749ce89c520417db49a3162b93a2459",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00027-of-00099.bin:   0%|          | 0.00/201M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "142583446cd84e8b8052f1e76d51040c",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00028-of-00099.bin:   0%|          | 0.00/269M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "e7b719e0d85446c68fe093e6da369c77",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00029-of-00099.bin:   0%|          | 0.00/336M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "68c08f989b30498697ebb0981b381282",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00030-of-00099.bin:   0%|          | 0.00/201M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "47fdfc3bbf2d421c8569a3af09af4ea3",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00031-of-00099.bin:   0%|          | 0.00/269M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "b2626e0256134da78238839b6d9c16fd",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00032-of-00099.bin:   0%|          | 0.00/336M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "cc12490ac2a24f179e2a3551afd36c85",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00033-of-00099.bin:   0%|          | 0.00/201M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "5bd9a3609f5c459cb2374f8092ee7ad0",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00034-of-00099.bin:   0%|          | 0.00/269M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "d1471877a3374759b45c271c1d1ea2ec",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00035-of-00099.bin:   0%|          | 0.00/336M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "133612d091924611921fd15439f5e77e",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00036-of-00099.bin:   0%|          | 0.00/201M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "0a27223aa98547d9b2594a6b07124c32",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00037-of-00099.bin:   0%|          | 0.00/269M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "d1486e9c66f548a68f74796bd53fb455",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00038-of-00099.bin:   0%|          | 0.00/336M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "a8b399e5f54e49af9244582b0dab80c0",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00039-of-00099.bin:   0%|          | 0.00/201M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "578354b4c46e494091c6d7512cc6478c",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00040-of-00099.bin:   0%|          | 0.00/269M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "3d6ac4476e7847b2be630ee8579266a0",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00041-of-00099.bin:   0%|          | 0.00/336M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "262e5823844043518a0d6d5c032b4879",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00042-of-00099.bin:   0%|          | 0.00/201M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "e26369b08efc4a56abdf734073d5b663",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00043-of-00099.bin:   0%|          | 0.00/269M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "1ff52b711eb74ae79b9385fa099fdb7d",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00044-of-00099.bin:   0%|          | 0.00/336M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "c885d323f0084dd6844ad7a8398d45cb",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00045-of-00099.bin:   0%|          | 0.00/201M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "0f10e49b131a4266878c448e081f4a25",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00046-of-00099.bin:   0%|          | 0.00/269M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "3c164ab5b6b440bab881864b28c29029",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00047-of-00099.bin:   0%|          | 0.00/336M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "c77c363d2c10403bb274182182d2b153",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00048-of-00099.bin:   0%|          | 0.00/201M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "ba02028abff94a9ca987b09a1a545836",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00049-of-00099.bin:   0%|          | 0.00/269M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "2ab14db346d04983a3fc20b7e4316558",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00050-of-00099.bin:   0%|          | 0.00/336M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "28afcec8ec764df1bd919fbc76cd9fce",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00051-of-00099.bin:   0%|          | 0.00/201M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "0d2057d1014e4d1791f77cf357f88671",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00052-of-00099.bin:   0%|          | 0.00/269M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "0412a96b109c4bd2b9601980aa2d28d0",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00053-of-00099.bin:   0%|          | 0.00/336M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "b554e60f4e724c6d8328f4dd63ab1123",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00054-of-00099.bin:   0%|          | 0.00/201M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "dc5f368af44144f3b0d9a1d90d5b37c7",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00055-of-00099.bin:   0%|          | 0.00/269M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "d645b5f636394298a898036d184cc010",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00056-of-00099.bin:   0%|          | 0.00/336M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "a1f5a816816245b9ae15663772ecc19c",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00057-of-00099.bin:   0%|          | 0.00/201M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "c832cc9d7bef44b0ac62073325b809e8",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00058-of-00099.bin:   0%|          | 0.00/269M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "d63640ea2d704c758c7ddee700e523ad",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00059-of-00099.bin:   0%|          | 0.00/336M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "a1225deddf53468cbdd6fa8ffd8d8cb1",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00060-of-00099.bin:   0%|          | 0.00/201M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "3bdeba874e934307955584a268dde173",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00061-of-00099.bin:   0%|          | 0.00/269M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "9d70d8feec684d77ae4bc7690f9cb55d",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00062-of-00099.bin:   0%|          | 0.00/336M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "0c56c91cc3fc452c88bb4f997664d19e",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00063-of-00099.bin:   0%|          | 0.00/201M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "dd0d36cb577f4305ac797ecad780d19e",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00064-of-00099.bin:   0%|          | 0.00/269M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "002237b430d94a1298330da601d70c18",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00065-of-00099.bin:   0%|          | 0.00/336M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "6486361984ce4bffb2fea885806d5af4",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00066-of-00099.bin:   0%|          | 0.00/201M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "06c9cd92100749cabb58b33a274cdeb0",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00067-of-00099.bin:   0%|          | 0.00/269M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "2db64d3bd67b4042b83e612f2f9c9272",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00068-of-00099.bin:   0%|          | 0.00/336M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "aa3adfc1492d40d8ad3afb7261378baf",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00069-of-00099.bin:   0%|          | 0.00/201M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "3b97a72be49c46e393adba5e5b07c2cb",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00070-of-00099.bin:   0%|          | 0.00/269M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "b1c2d6c5190745e7ba30986312f67c74",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00071-of-00099.bin:   0%|          | 0.00/336M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "b3211c31e99043a0910ca02b2eab03c3",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00072-of-00099.bin:   0%|          | 0.00/201M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "349a7e1c31324a238af887b0fce811ca",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00073-of-00099.bin:   0%|          | 0.00/269M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "f55b2d063a6b4e95b6cdfac02ba67c91",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00074-of-00099.bin:   0%|          | 0.00/336M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "02ecea2cf8764d1eb33f238b0d626f62",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00075-of-00099.bin:   0%|          | 0.00/201M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "4163343b62e847df8ff628b64ff6d9eb",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00076-of-00099.bin:   0%|          | 0.00/269M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "f699e1cb31a341cdae13b18a6e3062e9",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00077-of-00099.bin:   0%|          | 0.00/336M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "64b5a2def97a4d468955f27f75774892",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00078-of-00099.bin:   0%|          | 0.00/201M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "d604168a465a4c478f818a41f0b286c8",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00079-of-00099.bin:   0%|          | 0.00/269M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "dc7fc3d5848f405a9e4339df8cf2cfe9",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00080-of-00099.bin:   0%|          | 0.00/336M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "af21b469f0df4de8bb4e03d7a0efbe94",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00081-of-00099.bin:   0%|          | 0.00/201M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "59c735d0104540c48dfd75b6e76ec4e3",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00082-of-00099.bin:   0%|          | 0.00/269M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "b1f32763cfd74f7e95d368ff31267152",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00083-of-00099.bin:   0%|          | 0.00/336M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "4b230ce4e62941f5b616ff2ca5b7ff8e",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00084-of-00099.bin:   0%|          | 0.00/201M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "fc5c3aefe5e844d28c859479a59bc4ce",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00085-of-00099.bin:   0%|          | 0.00/269M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "13aa921806364975bb81770df458232c",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00086-of-00099.bin:   0%|          | 0.00/336M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "2dbf0116aeda4b9dbd88732f48dfdbd2",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00087-of-00099.bin:   0%|          | 0.00/201M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "1b8014abc5e04a7780dfbc4428a0c34f",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00088-of-00099.bin:   0%|          | 0.00/269M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "0e60d9cbd43f45f0bfb43d67e8e0ec26",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00089-of-00099.bin:   0%|          | 0.00/336M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "3e264baeea144df7b68d437ed2ae1b99",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00090-of-00099.bin:   0%|          | 0.00/201M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "eaaceb464e454be5a5e70bad773aa002",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00091-of-00099.bin:   0%|          | 0.00/269M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "8714174efd8448dc9bf34319ceaeea75",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00092-of-00099.bin:   0%|          | 0.00/336M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "6204b596f53c4788b2ae2540bd9939c6",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00093-of-00099.bin:   0%|          | 0.00/201M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "6c8774f85b1b4f6985ce363e0ea7ad91",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00094-of-00099.bin:   0%|          | 0.00/269M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "912b45f88a8e4120874a4b87a2b9f800",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00095-of-00099.bin:   0%|          | 0.00/336M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "0b8ec351ace64b3195c700d97f525ce9",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00096-of-00099.bin:   0%|          | 0.00/201M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "af79d62367d2425195652d0d19b9c04f",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00097-of-00099.bin:   0%|          | 0.00/269M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "f96cba8484b04d02ab882db6918687f6",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00098-of-00099.bin:   0%|          | 0.00/269M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "a9966c0da71d41fba3218c89a6a4bf15",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)l-00099-of-00099.bin:   0%|          | 0.00/4.19G [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "7ef499fbb7f14254ba779d903681a90c",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Loading checkpoint shards:   0%|          | 0/98 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "b6ed651454244ea68720203da6dcd85f",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)neration_config.json:   0%|          | 0.00/168 [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "m.load_model(device=\"cpu\",torch_dtype=torch.bfloat16)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "888f0cf6-4b9c-455d-8f1f-ad10978f9ccb",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "('ข้าวผัดน้ําพริกลงเรือ', '<human>: ขอวิธีทำข้าวผัดหน่อย\\n<bot>: ')"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "m.instruct_generate(instruct=\"ขอวิธีทำข้าวผัดหน่อย\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "d4fbd1e6-8a41-4b46-aa12-a23f8df9bcb0",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "('ข้าวผัดน้ําพริกลงเรือ', '<human>: ขอวิธีทำข้าวผัดหน่อย\\n<bot>: ')"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "m.instruct_generate(instruct=\"ขอวิธีทำข้าวผัดหน่อย\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "fe71b834-4f12-406e-8a74-41829d8a7d9d",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "('เป้าหมายของคุณคือการลดน้ําหนักหรือไม่?', '<human>: ขอลดน้ำหนัก\\n<bot>: ')"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "m.instruct_generate(instruct=\"ขอลดน้ำหนัก\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "2cd5063d-21b6-40fb-8e4e-c54fb07ac613",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "('ลดน้ําหนักให้ได้ผล ต้องทําอย่างค่อยเป็นค่อยไป ปรับเปลี่ยนพฤติกรรมการกินอาหาร ออกกําลังกายอย่างสม่ําเสมอ และพักผ่อนให้เพียงพอ ที่สําคัญควรหลีกเลี่ยงอาหารที่มีแคลอรี่สูง เช่น อาหารทอด อาหารมัน อาหารที่มีน้ําตาลสูง และเครื่องดื่มแอลกอฮอล์',\n",
+       " '<human>: ขอวิธีลดน้ำหนัก\\n<bot>: ')"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "m.instruct_generate(instruct=\"ขอวิธีลดน้ำหนัก\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b5b54b24-59b8-400e-89ff-0b8b67dce71f",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/pythainlp/chat/__init__.py b/pythainlp/chat/__init__.py
index 6400a04f4..8c594795c 100644
--- a/pythainlp/chat/__init__.py
+++ b/pythainlp/chat/__init__.py
@@ -14,9 +14,8 @@
 # limitations under the License.
 """
 pythainlp.chat
-
-WIP
 """
 
-__all__ = []
+__all__ = ["ChatBotModel"]
 
+from pythainlp.chat.core import ChatBotModel
diff --git a/pythainlp/chat/core.py b/pythainlp/chat/core.py
index 14c5b9cac..c155f7443 100644
--- a/pythainlp/chat/core.py
+++ b/pythainlp/chat/core.py
@@ -12,17 +12,63 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import torch
 
-class Chat:
+
+class ChatBotModel:
     def __init__(self):
-        pass
-    def load_model(self, model_path,load_in_8bit=False,offload_folder="./",**):
-        if model_path == "wangchanglm":
+        """
+        Chat with AI generation
+        """
+        self.history = []
+    def load_model(
+        self,
+        model_name:str="wangchanglm",
+        return_dict:bool=True,
+        load_in_8bit:bool=False,
+        device:str="cuda",
+        torch_dtype=torch.float16,
+        offload_folder:str="./",
+        low_cpu_mem_usage:bool=True
+    ):
+        """
+        Load model
+        
+        :param str model_name: Model name (Now, we support wangchanglm only)
+        :param bool return_dict: return_dict
+        :param bool load_in_8bit: load model in 8bit
+        :param str device: device (cpu, cuda or other)
+        :param torch_dtype torch_dtype: torch_dtype
+        :param str offload_folder: offload folder
+        :param bool low_cpu_mem_usage: low cpu mem usage
+        """
+        if model_name == "wangchanglm":
             from pythainlp.generate.wangchanglm import WangChanGLM
             self.model = WangChanGLM()
             self.model.load_model(
-                model_path="pythainlp/wangchanglm-7.5B-sft-en-8bit-sharded",
+                model_path="pythainlp/wangchanglm-7.5B-sft-en-sharded",
+                return_dict=return_dict,
                 load_in_8bit=load_in_8bit,
                 offload_folder=offload_folder,
-                **
-            )
\ No newline at end of file
+                device=device,
+                torch_dtype=torch_dtype,
+                low_cpu_mem_usage=low_cpu_mem_usage
+            )
+        else:
+            raise NotImplementedError(f"We doesn't support {model_name}.")
+    def chat(self, text:str)->str:
+        """
+        Chatbot
+        
+        :param str text: text for asking chatbot.
+        :return: the answer from chatbot.
+        :rtype: str
+        """
+        _temp=""
+        if self.history!=[]:
+            for h,b in self.history:
+                _temp+=self.model.PROMPT_DICT['prompt_chatbot'].format_map({"human":h,"bot":b})+self.model.stop_token
+        _temp+=self.model.PROMPT_DICT['prompt_chatbot'].format_map({"human":human,"bot":""})
+        _bot = self.model.gen_instruct(_temp)
+        self.history.append((text,_bot))
+        return _bot
diff --git a/pythainlp/generate/wangchanglm.py b/pythainlp/generate/wangchanglm.py
index b936ec4ef..77e0043e1 100644
--- a/pythainlp/generate/wangchanglm.py
+++ b/pythainlp/generate/wangchanglm.py
@@ -13,14 +13,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import re
-import pandas as pd
 import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer
 
 
 class WangChanGLM:
     def __init__(self):
         self.exclude_pattern = re.compile(r'[^ก-๙]+')
+        self.stop_token = "\n"
         self.PROMPT_DICT = {
             "prompt_input": (
                 "<context>: {input}\n<human>: {instruction}\n<bot>: "
@@ -28,19 +27,35 @@ def __init__(self):
             "prompt_no_input": (
                 "<human>: {instruction}\n<bot>: "
             ),
+            "prompt_chatbot": (
+                "<human>: {human}\n<bot>: {bot}"
+            ),
         }
-    def is_exclude(self, text):
+    def is_exclude(self, text:str)->bool:
         return bool(self.exclude_pattern.search(text))
     def load_model(
         self,
-        model_path="pythainlp/wangchanglm-7.5B-sft-en-sharded",
-        return_dict=True,
-        load_in_8bit=False,
-        device="cuda",
+        model_path:str="pythainlp/wangchanglm-7.5B-sft-en-sharded",
+        return_dict:bool=True,
+        load_in_8bit:bool=False,
+        device:str="cuda",
         torch_dtype=torch.float16,
-        offload_folder="./",
-        low_cpu_mem_usage=True
+        offload_folder:str="./",
+        low_cpu_mem_usage:bool=True
     ):
+        """
+        Load model
+        
+        :param str model_path: Model path
+        :param bool return_dict: return_dict
+        :param bool load_in_8bit: load model in 8bit
+        :param str device: device (cpu, cuda or other)
+        :param torch_dtype torch_dtype: torch_dtype
+        :param str offload_folder: offload folder
+        :param bool low_cpu_mem_usage: low cpu mem usage
+        """
+        import pandas as pd
+        from transformers import AutoModelForCausalLM, AutoTokenizer
         self.device = device
         self.torch_dtype = torch_dtype
         self.model_path = model_path
@@ -59,16 +74,31 @@ def load_model(
         self.exclude_ids = self.df[self.df.is_exclude==True].idx.tolist()
     def gen_instruct(
         self,
-        text,
-        max_new_tokens=512,
-        top_p=0.95,
-        temperature=0.9,
-        top_k=50,
-        no_repeat_ngram_size=2,
-        typical_p=1.,
-        thai_only=True,
-        skip_special_tokens=True
+        text:str,
+        max_new_tokens:int=512,
+        top_p:float=0.95,
+        temperature:float=0.9,
+        top_k:int=50,
+        no_repeat_ngram_size:int=2,
+        typical_p:float=1.,
+        thai_only:bool=True,
+        skip_special_tokens:bool=True
     ):
+        """
+        Generate Instruct
+        
+        :param str text: text
+        :param int max_new_tokens: max new tokens
+        :param float top_p: Top p
+        :param float temperature: temperature
+        :param int top_k: Top k
+        :param int no_repeat_ngram_size: no repeat ngram size
+        :param float typical_p: typical p
+        :param bool thai_only: Thai only
+        :param bool skip_special_tokens: skip special tokens
+        :return: the answer from Instruct.
+        :rtype: str
+        """
         batch = self.tokenizer(text, return_tensors="pt")
         with torch.autocast(device_type=self.device, dtype=self.torch_dtype):
             if thai_only:
@@ -102,11 +132,28 @@ def instruct_generate(
         max_new_tokens=512,
         temperature: float =0.9,
         top_p: float = 0.95,
-        top_k=50,
-        no_repeat_ngram_size=2,
-        typical_p=1,
-        thai_only=True
+        top_k:int=50,
+        no_repeat_ngram_size:int=2,
+        typical_p:float=1,
+        thai_only:bool=True,
+        skip_special_tokens:bool=True
     ):
+        """
+        Generate Instruct
+        
+        :param str instruct: Instruct
+        :param str context: context
+        :param int max_new_tokens: max new tokens
+        :param float top_p: Top p
+        :param float temperature: temperature
+        :param int top_k: Top k
+        :param int no_repeat_ngram_size: no repeat ngram size
+        :param float typical_p: typical p
+        :param bool thai_only: Thai only
+        :param bool skip_special_tokens: skip special tokens
+        :return: the answer from Instruct.
+        :rtype: str
+        """
         if context == None or context=="":
             prompt = self.PROMPT_DICT['prompt_no_input'].format_map(
                 {'instruction': instruct, 'input': ''}
@@ -123,6 +170,7 @@ def instruct_generate(
             temperature=temperature,
             no_repeat_ngram_size=no_repeat_ngram_size,
             typical_p=typical_p,
-            thai_only=thai_only
+            thai_only=thai_only,
+            skip_special_tokens=skip_special_tokens
         )
-        return result,prompt
\ No newline at end of file
+        return result
diff --git a/setup.py b/setup.py
index 7da84e696..423459f50 100644
--- a/setup.py
+++ b/setup.py
@@ -117,6 +117,11 @@
     "word_approximation":{
         "panphon>=0.20.0"
     },
+    "wangchanglm": [
+        "transformers>=4.6.0",
+        "sentencepiece>=0.1.91",
+        "pandas>=0.24"
+    ],
     "full": [
         "PyYAML>=5.3.1",
         "attacut>=1.0.4",

From 5522ac2c3f9a31a9bb6e7c9bee277b013755ae74 Mon Sep 17 00:00:00 2001
From: Wannaphong Phatthiyaphaibun <wannaphong@yahoo.com>
Date: Thu, 13 Jul 2023 22:10:16 -0700
Subject: [PATCH 5/5] Add reset chat

---
 pythainlp/chat/core.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/pythainlp/chat/core.py b/pythainlp/chat/core.py
index c155f7443..8eed4685e 100644
--- a/pythainlp/chat/core.py
+++ b/pythainlp/chat/core.py
@@ -21,6 +21,11 @@ def __init__(self):
         Chat with AI generation
         """
         self.history = []
+    def reset_chat(self):
+        """
+        Reset chat by clean history
+        """
+        self.history = []
     def load_model(
         self,
         model_name:str="wangchanglm",