From bbd5ba998ed09d81ca52db919a3b0354cd70d170 Mon Sep 17 00:00:00 2001
From: Wannaphong Phatthiyaphaibun <wannaphong@yahoo.com>
Date: Fri, 9 Oct 2020 20:31:49 +0700
Subject: [PATCH 1/7] Update attacut.py

---
 pythainlp/tokenize/attacut.py | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/pythainlp/tokenize/attacut.py b/pythainlp/tokenize/attacut.py
index 05be19fdd..c36284c5f 100644
--- a/pythainlp/tokenize/attacut.py
+++ b/pythainlp/tokenize/attacut.py
@@ -7,16 +7,22 @@
 """
 from typing import List
 
-from attacut import tokenize
+from attacut import Tokenizer
+_MODEL_NAME = "attacut-sc"
+_tokenize = Tokenizer(model=_MODEL_NAME)
 
-
-def segment(text: str) -> List[str]:
+def segment(text: str, model: str = "attacut-sc") -> List[str]:
     """
     Wrapper for AttaCut - Fast and Reasonably Accurate Word Tokenizer for Thai
     :param str text: text to be tokenized to words
+    :param str model:  word tokenizer model to be tokenized to words
     :return: list of words, tokenized from the text
     """
     if not text or not isinstance(text, str):
         return []
 
-    return tokenize(text)
+    if model != _MODEL_NAME:
+        _MODEL_NAME = model
+        _tokenize = Tokenizer(model=_MODEL_NAME)
+
+    return _tokenize.tokenize(text)

From 3939dcc1dbd0c1b98e45a488ed532dadd4600085 Mon Sep 17 00:00:00 2001
From: Wannaphong Phatthiyaphaibun <wannaphong@yahoo.com>
Date: Sat, 10 Oct 2020 01:37:02 +0700
Subject: [PATCH 2/7] Update Options for model (attacut)

---
 pythainlp/tokenize/attacut.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/pythainlp/tokenize/attacut.py b/pythainlp/tokenize/attacut.py
index c36284c5f..258bfa260 100644
--- a/pythainlp/tokenize/attacut.py
+++ b/pythainlp/tokenize/attacut.py
@@ -17,6 +17,10 @@ def segment(text: str, model: str = "attacut-sc") -> List[str]:
     :param str text: text to be tokenized to words
     :param str model:  word tokenizer model to be tokenized to words
     :return: list of words, tokenized from the text
+    :rtype: list[str]
+    **Options for model**
+        * *attacut-sc* (default)
+        * *attacut-c*
     """
     if not text or not isinstance(text, str):
         return []

From 68cdd963e57e735e28ecbea8cdb8aa0d634ac834 Mon Sep 17 00:00:00 2001
From: Wannaphong Phatthiyaphaibun <wannaphong@yahoo.com>
Date: Sat, 10 Oct 2020 01:39:54 +0700
Subject: [PATCH 3/7] fixed PEP8

---
 pythainlp/tokenize/attacut.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pythainlp/tokenize/attacut.py b/pythainlp/tokenize/attacut.py
index 258bfa260..9441a47a7 100644
--- a/pythainlp/tokenize/attacut.py
+++ b/pythainlp/tokenize/attacut.py
@@ -11,6 +11,7 @@
 _MODEL_NAME = "attacut-sc"
 _tokenize = Tokenizer(model=_MODEL_NAME)
 
+
 def segment(text: str, model: str = "attacut-sc") -> List[str]:
     """
     Wrapper for AttaCut - Fast and Reasonably Accurate Word Tokenizer for Thai

From e82f769734040f5d6bd1ec935b8c5e654d47843a Mon Sep 17 00:00:00 2001
From: Wannaphong Phatthiyaphaibun <wannaphong@yahoo.com>
Date: Sat, 10 Oct 2020 01:46:32 +0700
Subject: [PATCH 4/7] Update attacut.py

---
 pythainlp/tokenize/attacut.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pythainlp/tokenize/attacut.py b/pythainlp/tokenize/attacut.py
index 9441a47a7..44971e5f1 100644
--- a/pythainlp/tokenize/attacut.py
+++ b/pythainlp/tokenize/attacut.py
@@ -23,6 +23,7 @@ def segment(text: str, model: str = "attacut-sc") -> List[str]:
         * *attacut-sc* (default)
         * *attacut-c*
     """
+    global _MODEL_NAME, _tokenize
     if not text or not isinstance(text, str):
         return []
 

From c4576b9585a6ed2da49ffd1a33ac19d809a688fa Mon Sep 17 00:00:00 2001
From: Arthit Suriyawongkul <arthit@gmail.com>
Date: Sat, 10 Oct 2020 07:51:13 +0100
Subject: [PATCH 5/7] Add more info about model options

---
 pythainlp/tokenize/attacut.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/pythainlp/tokenize/attacut.py b/pythainlp/tokenize/attacut.py
index 44971e5f1..48a3e2c25 100644
--- a/pythainlp/tokenize/attacut.py
+++ b/pythainlp/tokenize/attacut.py
@@ -8,8 +8,9 @@
 from typing import List
 
 from attacut import Tokenizer
+
 _MODEL_NAME = "attacut-sc"
-_tokenize = Tokenizer(model=_MODEL_NAME)
+_tokenizer = Tokenizer(model=_MODEL_NAME)
 
 
 def segment(text: str, model: str = "attacut-sc") -> List[str]:
@@ -20,15 +21,15 @@ def segment(text: str, model: str = "attacut-sc") -> List[str]:
     :return: list of words, tokenized from the text
     :rtype: list[str]
     **Options for model**
-        * *attacut-sc* (default)
-        * *attacut-c*
+        * *attacut-sc* (default) using both syllable and character features
+        * *attacut-c* using only character feature
     """
-    global _MODEL_NAME, _tokenize
+    global _MODEL_NAME, _tokenizer
     if not text or not isinstance(text, str):
         return []
 
     if model != _MODEL_NAME:
         _MODEL_NAME = model
-        _tokenize = Tokenizer(model=_MODEL_NAME)
+        _tokenizer = Tokenizer(model=_MODEL_NAME)
 
-    return _tokenize.tokenize(text)
+    return _tokenizer.tokenize(text)

From c0831698e2e127d8d437a2c0aceda0dd091b70bd Mon Sep 17 00:00:00 2001
From: Wannaphong Phatthiyaphaibun <wannaphong@yahoo.com>
Date: Tue, 20 Oct 2020 21:54:49 +0700
Subject: [PATCH 6/7] Update attacut.py

---
 pythainlp/tokenize/attacut.py | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/pythainlp/tokenize/attacut.py b/pythainlp/tokenize/attacut.py
index 48a3e2c25..0c4881689 100644
--- a/pythainlp/tokenize/attacut.py
+++ b/pythainlp/tokenize/attacut.py
@@ -9,8 +9,21 @@
 
 from attacut import Tokenizer
 
-_MODEL_NAME = "attacut-sc"
-_tokenizer = Tokenizer(model=_MODEL_NAME)
+
+class attacut:
+    def __init__(self, model= "attacut-sc"):
+        if model == "attacut-sc":
+            self.load_attacut_sc()
+        else:
+            self.load_attacut_c()
+    def tokenize(self,text:str) -> List[str]:
+        return self._tokenizer.tokenize(text)
+    def load_attacut_sc(self):
+        self._MODEL_NAME = "attacut-sc"
+        self._tokenizer = Tokenizer(model=self._MODEL_NAME)
+    def load_attacut_c(self):
+        self._MODEL_NAME = "attacut-c"
+        self._tokenizer = Tokenizer(model=self._MODEL_NAME)
 
 
 def segment(text: str, model: str = "attacut-sc") -> List[str]:
@@ -24,12 +37,9 @@ def segment(text: str, model: str = "attacut-sc") -> List[str]:
         * *attacut-sc* (default) using both syllable and character features
         * *attacut-c* using only character feature
     """
-    global _MODEL_NAME, _tokenizer
     if not text or not isinstance(text, str):
         return []
 
-    if model != _MODEL_NAME:
-        _MODEL_NAME = model
-        _tokenizer = Tokenizer(model=_MODEL_NAME)
+    _tokenizer = attacut(model)
 
     return _tokenizer.tokenize(text)

From 2b618716a3a2985aa959a598f2b8ef700b560d20 Mon Sep 17 00:00:00 2001
From: Arthit Suriyawongkul <arthit@gmail.com>
Date: Mon, 7 Dec 2020 12:18:04 +0700
Subject: [PATCH 7/7] Simplify attacut class

---
 pythainlp/tokenize/attacut.py | 23 ++++++++++-------------
 1 file changed, 10 insertions(+), 13 deletions(-)

diff --git a/pythainlp/tokenize/attacut.py b/pythainlp/tokenize/attacut.py
index 0c4881689..0c42130eb 100644
--- a/pythainlp/tokenize/attacut.py
+++ b/pythainlp/tokenize/attacut.py
@@ -10,20 +10,17 @@
 from attacut import Tokenizer
 
 
-class attacut:
-    def __init__(self, model= "attacut-sc"):
-        if model == "attacut-sc":
-            self.load_attacut_sc()
-        else:
-            self.load_attacut_c()
-    def tokenize(self,text:str) -> List[str]:
-        return self._tokenizer.tokenize(text)
-    def load_attacut_sc(self):
+class AttacutTokenizer:
+    def __init__(self, model="attacut-sc"):
         self._MODEL_NAME = "attacut-sc"
+
+        if model == "attacut-c":
+            self._MODEL_NAME = "attacut-c"
+
         self._tokenizer = Tokenizer(model=self._MODEL_NAME)
-    def load_attacut_c(self):
-        self._MODEL_NAME = "attacut-c"
-        self._tokenizer = Tokenizer(model=self._MODEL_NAME)
+
+    def tokenize(self, text: str) -> List[str]:
+        return self._tokenizer.tokenize(text)
 
 
 def segment(text: str, model: str = "attacut-sc") -> List[str]:
@@ -40,6 +37,6 @@ def segment(text: str, model: str = "attacut-sc") -> List[str]:
     if not text or not isinstance(text, str):
         return []
 
-    _tokenizer = attacut(model)
+    _tokenizer = AttacutTokenizer(model)
 
     return _tokenizer.tokenize(text)