PyThaiNLP · bact · Dec 14, 2020 · Oct 9, 2020 · Oct 9, 2020 · Oct 9, 2020
diff --git a/pythainlp/tokenize/attacut.py b/pythainlp/tokenize/attacut.py
@@ -7,16 +7,36 @@
 """
 from typing import List
 
-from attacut import tokenize
+from attacut import Tokenizer
 
 
-def segment(text: str) -> List[str]:
+class AttacutTokenizer:
+    def __init__(self, model="attacut-sc"):
+        self._MODEL_NAME = "attacut-sc"
+
+        if model == "attacut-c":
+            self._MODEL_NAME = "attacut-c"
+
+        self._tokenizer = Tokenizer(model=self._MODEL_NAME)
+
+    def tokenize(self, text: str) -> List[str]:
+        return self._tokenizer.tokenize(text)
+
+
+def segment(text: str, model: str = "attacut-sc") -> List[str]:
     """
     Wrapper for AttaCut - Fast and Reasonably Accurate Word Tokenizer for Thai
     :param str text: text to be tokenized to words
+    :param str model:  word tokenizer model to be tokenized to words
     :return: list of words, tokenized from the text
+    :rtype: list[str]
+    **Options for model**
+        * *attacut-sc* (default) using both syllable and character features
+        * *attacut-c* using only character feature
     """
     if not text or not isinstance(text, str):
         return []
 
-    return tokenize(text)
+    _tokenizer = AttacutTokenizer(model)
+
+    return _tokenizer.tokenize(text)