Skip to content

Commit c083169

Browse files
committed
Update attacut.py
1 parent c4576b9 commit c083169

File tree

1 file changed

+16
-6
lines changed

1 file changed

+16
-6
lines changed

pythainlp/tokenize/attacut.py

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,21 @@
99

1010
from attacut import Tokenizer
1111

12-
_MODEL_NAME = "attacut-sc"
13-
_tokenizer = Tokenizer(model=_MODEL_NAME)
12+
13+
class attacut:
14+
def __init__(self, model= "attacut-sc"):
15+
if model == "attacut-sc":
16+
self.load_attacut_sc()
17+
else:
18+
self.load_attacut_c()
19+
def tokenize(self,text:str) -> List[str]:
20+
return self._tokenizer.tokenize(text)
21+
def load_attacut_sc(self):
22+
self._MODEL_NAME = "attacut-sc"
23+
self._tokenizer = Tokenizer(model=self._MODEL_NAME)
24+
def load_attacut_c(self):
25+
self._MODEL_NAME = "attacut-c"
26+
self._tokenizer = Tokenizer(model=self._MODEL_NAME)
1427

1528

1629
def segment(text: str, model: str = "attacut-sc") -> List[str]:
@@ -24,12 +37,9 @@ def segment(text: str, model: str = "attacut-sc") -> List[str]:
2437
* *attacut-sc* (default) using both syllable and character features
2538
* *attacut-c* using only character feature
2639
"""
27-
global _MODEL_NAME, _tokenizer
2840
if not text or not isinstance(text, str):
2941
return []
3042

31-
if model != _MODEL_NAME:
32-
_MODEL_NAME = model
33-
_tokenizer = Tokenizer(model=_MODEL_NAME)
43+
_tokenizer = attacut(model)
3444

3545
return _tokenizer.tokenize(text)

0 commit comments

Comments
 (0)