99
1010from  attacut  import  Tokenizer 
1111
12- _MODEL_NAME  =  "attacut-sc" 
13- _tokenizer  =  Tokenizer (model = _MODEL_NAME )
12+ 
13+ class  attacut :
14+     def  __init__ (self , model =  "attacut-sc" ):
15+         if  model  ==  "attacut-sc" :
16+             self .load_attacut_sc ()
17+         else :
18+             self .load_attacut_c ()
19+     def  tokenize (self ,text :str ) ->  List [str ]:
20+         return  self ._tokenizer .tokenize (text )
21+     def  load_attacut_sc (self ):
22+         self ._MODEL_NAME  =  "attacut-sc" 
23+         self ._tokenizer  =  Tokenizer (model = self ._MODEL_NAME )
24+     def  load_attacut_c (self ):
25+         self ._MODEL_NAME  =  "attacut-c" 
26+         self ._tokenizer  =  Tokenizer (model = self ._MODEL_NAME )
1427
1528
1629def  segment (text : str , model : str  =  "attacut-sc" ) ->  List [str ]:
@@ -24,12 +37,9 @@ def segment(text: str, model: str = "attacut-sc") -> List[str]:
2437        * *attacut-sc* (default) using both syllable and character features 
2538        * *attacut-c* using only character feature 
2639    """ 
27-     global  _MODEL_NAME , _tokenizer 
2840    if  not  text  or  not  isinstance (text , str ):
2941        return  []
3042
31-     if  model  !=  _MODEL_NAME :
32-         _MODEL_NAME  =  model 
33-         _tokenizer  =  Tokenizer (model = _MODEL_NAME )
43+     _tokenizer  =  attacut (model )
3444
3545    return  _tokenizer .tokenize (text )
0 commit comments