diff --git a/docker_requirements.txt b/docker_requirements.txt index 69d452c00..3ae30d4bd 100644 --- a/docker_requirements.txt +++ b/docker_requirements.txt @@ -23,5 +23,6 @@ fairseq==0.10.2 pyicu==2.6 deepcut==0.7.0.0 h5py==2.10.0 -tensorflow==2.4.2 -pandas==0.24 \ No newline at end of file +tensorflow==2.4.0 +pandas==0.24 +tltk==1.3.8 diff --git a/docs/api/tag.rst b/docs/api/tag.rst index 87cf0a766..2ab526c60 100644 --- a/docs/api/tag.rst +++ b/docs/api/tag.rst @@ -232,6 +232,7 @@ Modules .. autofunction:: chunk_parse .. autoclass:: pythainlp.tag.named_entity.ThaiNameTagger :members: get_ner +.. autofunction:: pythainlp.tag.tltk.get_ner Tagger Engines -------------- diff --git a/docs/notes/installation.rst b/docs/notes/installation.rst index d48354d6e..9b5669ae8 100644 --- a/docs/notes/installation.rst +++ b/docs/notes/installation.rst @@ -27,6 +27,7 @@ where ``extras`` can be - ``mt5`` (to mt5 models for Thai text summarizer) - ``wordnet`` (to support wordnet) - ``spell`` (to support phunspell & symspellpy) + - ``tltk`` (to support tltk) - ``full`` (install everything) For dependency details, look at `extras` variable in `setup.py `_. diff --git a/pythainlp/spell/core.py b/pythainlp/spell/core.py index a749fa61c..65d6ca54c 100644 --- a/pythainlp/spell/core.py +++ b/pythainlp/spell/core.py @@ -22,6 +22,7 @@ def spell(word: str, engine: str = "pn") -> List[str]: * *pn* - Peter Norvig's algorithm [#norvig_spellchecker]_ (default) * *phunspell* - A spell checker utilizing spylls a port of Hunspell. * *symspellpy* - symspellpy is a Python port of SymSpell v6.5. + * *tltk* - wrapper for `TLTK `_., :return: list of possible correct words within 1 or 2 edit distance and sorted by frequency of word occurrences in the spelling dictionary @@ -39,6 +40,9 @@ def spell(word: str, engine: str = "pn") -> List[str]: spell("เส้นตรบ") # output: ['เส้นตรง'] + spell("เส้นตรบ", engine="tltk") + # output: ['เส้นตรง'] + spell("ครัช") # output: ['ครับ', 'ครัว', 'รัช', 'ครัม', 'ครัน', 'วรัช', 'ครัส', # 'ปรัช', 'บรัช', 'ครัง', 'คัช', 'คลัช', 'ครัย', 'ครัด'] @@ -58,6 +62,9 @@ def spell(word: str, engine: str = "pn") -> List[str]: elif engine == "symspellpy": from pythainlp.spell.symspellpy import spell as SPELL_CHECKER text_correct = SPELL_CHECKER(word) + elif engine == "tltk": + from pythainlp.spell.tltk import spell as SPELL_CHECKER + text_correct = SPELL_CHECKER(word) else: text_correct = DEFAULT_SPELL_CHECKER.spell(word) diff --git a/pythainlp/spell/tltk.py b/pythainlp/spell/tltk.py new file mode 100644 index 000000000..6a739b837 --- /dev/null +++ b/pythainlp/spell/tltk.py @@ -0,0 +1,6 @@ +from tltk.nlp import spell_candidates +from typing import List + + +def spell(text: str) -> List[str]: + return spell_candidates(text) diff --git a/pythainlp/tag/pos_tag.py b/pythainlp/tag/pos_tag.py index 97f1a6d70..8cd007987 100644 --- a/pythainlp/tag/pos_tag.py +++ b/pythainlp/tag/pos_tag.py @@ -15,6 +15,8 @@ def pos_tag( * *wangchanberta* - wangchanberta model (support lst20 corpus only \ and it supports a string only. if you input a list of word, \ it will convert list word to a string. + * *tltk* - TLTK: Thai Language Toolkit (support TNC corpus only.\ + if you choose other corpus, It's change to TNC corpus.) :param str corpus: the corpus that used to create the language model for tagger * *lst20* - `LST20 `_ corpus \ @@ -28,6 +30,7 @@ def pos_tag( * *pud* - `Parallel Universal Dependencies (PUD)\ `_ \ treebanks, natively use Universal POS tags + * *tnc* - Thai National Corpus (support tltk engine only) :return: a list of tuples (word, POS tag) :rtype: list[tuple[str, str]] @@ -89,13 +92,25 @@ def pos_tag( if not words: return [] - if engine == "perceptron": + _support_corpus = ["lst20", "lst20_ud", "orchid", "orchid_ud", "pud"] + + if engine == "perceptron" and corpus in _support_corpus: from pythainlp.tag.perceptron import tag as tag_ elif engine == "wangchanberta" and corpus == "lst20": from pythainlp.wangchanberta.postag import pos_tag as tag_ words = ''.join(words) - else: # default, use "unigram" ("old") engine + elif engine == "tltk": + from pythainlp.tag.tltk import pos_tag as tag_ + corpus = "tnc" + elif engine == "unigram" and corpus in _support_corpus: # default from pythainlp.tag.unigram import tag as tag_ + else: + raise ValueError( + "pos_tag not support {0} engine or {1} corpus.".format( + engine, + corpus + ) + ) word_tags = tag_(words, corpus=corpus) @@ -114,6 +129,9 @@ def pos_tag_sents( :param str engine: * *perceptron* - perceptron tagger (default) * *unigram* - unigram tagger + * *wangchanberta* - wangchanberta model (support lst20 corpus only) + * *tltk* - TLTK: Thai Language Toolkit (support TNC corpus only.\ + if you choose other corpus, It's change to TNC corpus.) :param str corpus: the corpus that used to create the language model for tagger * *lst20* - `LST20 `_ corpus \ @@ -127,6 +145,7 @@ def pos_tag_sents( * *pud* - `Parallel Universal Dependencies (PUD)\ `_ \ treebanks, natively use Universal POS tags + * *tnc* - Thai National Corpus (support tltk engine only) :return: a list of lists of tuples (word, POS tag) :rtype: list[list[tuple[str, str]]] diff --git a/pythainlp/tag/tltk.py b/pythainlp/tag/tltk.py new file mode 100644 index 000000000..bfb5bacb9 --- /dev/null +++ b/pythainlp/tag/tltk.py @@ -0,0 +1,93 @@ +# -*- coding: utf-8 -*- +from typing import List, Tuple, Union +from tltk import nlp +from pythainlp.tokenize import word_tokenize + +nlp.pos_load() +nlp.ner_load() + + +def pos_tag(words: List[str], corpus: str = "tnc") -> List[Tuple[str, str]]: + if corpus != "tnc": + raise ValueError("tltk not support {0} corpus.".format(0)) + return nlp.pos_tag_wordlist(words) + + +def _post_process(text: str) -> str: + return text.replace("", " ") + + +def get_ner( + text: str, + pos: bool = True, + tag: bool = False +) -> Union[List[Tuple[str, str]], List[Tuple[str, str, str]], str]: + """ + Named-entity recognizer from **TLTK** + + This function tags named-entitiy from text in IOB format. + + :param str text: text in Thai to be tagged + :param bool pos: To include POS tags in the results (`True`) or + exclude (`False`). The defualt value is `True` + :param bool tag: output like html tag. + :return: a list of tuple associated with tokenized word, NER tag, + POS tag (if the parameter `pos` is specified as `True`), + and output like html tag (if the parameter `tag` is + specified as `True`). + Otherwise, return a list of tuple associated with tokenized + word and NER tag + :rtype: Union[list[tuple[str, str]], list[tuple[str, str, str]]], str + + :Example: + + >>> from pythainlp.tag.tltk import get_ner + >>> get_ner("เขาเรียนที่โรงเรียนนางรอง") + [('เขา', 'PRON', 'O'), + ('เรียน', 'VERB', 'O'), + ('ที่', 'SCONJ', 'O'), + ('โรงเรียน', 'NOUN', 'B-L'), + ('นางรอง', 'VERB', 'I-L')] + >>> get_ner("เขาเรียนที่โรงเรียนนางรอง", pos=False) + [('เขา', 'O'), + ('เรียน', 'O'), + ('ที่', 'O'), + ('โรงเรียน', 'B-L'), + ('นางรอง', 'I-L')] + >>> get_ner("เขาเรียนที่โรงเรียนนางรอง", tag=True) + 'เขาเรียนที่โรงเรียนนางรอง' + """ + if not text: + return [] + list_word = [] + for i in word_tokenize(text, engine="tltk"): + if i == " ": + i = "" + list_word.append(i) + _pos = nlp.pos_tag_wordlist(list_word) + sent_ner = [ + (_post_process(word), pos, ner) for word, pos, ner in nlp.ner(_pos) + ] + if tag: + temp = "" + sent = "" + for idx, (word, pos, ner) in enumerate(sent_ner): + if ner.startswith("B-") and temp != "": + sent += "" + temp = ner[2:] + sent += "<" + temp + ">" + elif ner.startswith("B-"): + temp = ner[2:] + sent += "<" + temp + ">" + elif ner == "O" and temp != "": + sent += "" + temp = "" + sent += word + + if idx == len(sent_ner) - 1 and temp != "": + sent += "" + + return sent + if pos is False: + return [(word, ner) for word, pos, ner in sent_ner] + return sent_ner diff --git a/pythainlp/tokenize/core.py b/pythainlp/tokenize/core.py index c5a501230..41f03f2e6 100644 --- a/pythainlp/tokenize/core.py +++ b/pythainlp/tokenize/core.py @@ -86,6 +86,8 @@ def word_tokenize( and combining tokens that are parts of the same named-entity. * *sefr_cut* - wrapper for `SEFR CUT `_., + * *tltk* - wrapper for + `TLTK `_., :Note: - The parameter **custom_dict** can be provided as an argument \ @@ -182,6 +184,10 @@ def word_tokenize( elif engine == "sefr_cut": from pythainlp.tokenize.sefr_cut import segment + segments = segment(text) + elif engine == "tltk": + from pythainlp.tokenize.tltk import segment + segments = segment(text) else: raise ValueError( @@ -215,6 +221,7 @@ def sent_tokenize( * *whitespace+newline* - split by whitespaces and newline. * *whitespace* - split by whitespaces. Specifiaclly, with \ :class:`regex` pattern ``r" +"`` + * *tltk* - split by `TLTK `_., :Example: Split the text based on *whitespace*:: @@ -271,6 +278,10 @@ def sent_tokenize( segments = re.split(r" +", text, re.U) elif engine == "whitespace+newline": segments = text.split() + elif engine == "tltk": + from pythainlp.tokenize.tltk import sent_tokenize as segment + + segments = segment(text) else: raise ValueError( f"""Tokenizer \"{engine}\" not found. @@ -314,6 +325,7 @@ def subword_tokenize( * *wangchanberta* - SentencePiece from wangchanberta model. * *dict* - newmm word tokenizer with a syllable dictionary * *ssg* - CRF syllable segmenter for Thai + * *tltk* - syllable tokenizer from tltk :Example: @@ -376,6 +388,8 @@ def subword_tokenize( ) elif engine == "ssg": from pythainlp.tokenize.ssg import segment + elif engine == "tltk": + from pythainlp.tokenize.tltk import syllable_tokenize as segment else: raise ValueError( f"""Tokenizer \"{engine}\" not found. diff --git a/pythainlp/tokenize/tltk.py b/pythainlp/tokenize/tltk.py new file mode 100644 index 000000000..2199edfa8 --- /dev/null +++ b/pythainlp/tokenize/tltk.py @@ -0,0 +1,34 @@ +# -*- coding: utf-8 -*- +from typing import List +from tltk.nlp import word_segment as tltk_segment +from tltk.nlp import syl_segment + + +def segment(text: str) -> List[str]: + if not text or not isinstance(text, str): + return [] + text = text.replace(" ", "") + _temp = tltk_segment(text).replace("", " ").replace("", "") + _temp = _temp.split('|') + if _temp[-1] == "": + del _temp[-1] + return _temp + + +def syllable_tokenize(text: str) -> List[str]: + if not text or not isinstance(text, str): + return [] + _temp = syl_segment(text) + _temp = _temp.split('~') + if _temp[-1] == "": + del _temp[-1] + return _temp + + +def sent_tokenize(text: str) -> List[str]: + text = text.replace(" ", "") + _temp = tltk_segment(text).replace("", " ").replace("|", "") + _temp = _temp.split('') + if _temp[-1] == "": + del _temp[-1] + return _temp diff --git a/pythainlp/transliterate/core.py b/pythainlp/transliterate/core.py index 5460eadd7..02c59ead7 100644 --- a/pythainlp/transliterate/core.py +++ b/pythainlp/transliterate/core.py @@ -23,6 +23,7 @@ def romanize(text: str, engine: str = DEFAULT_ROMANIZE_ENGINE) -> str: Transcription issued by Royal Institute of Thailand. * *thai2rom* - a deep learning-based Thai romanization engine (require PyTorch). + * *tltk* - TLTK: Thai Language Toolkit :Example: :: @@ -35,6 +36,9 @@ def romanize(text: str, engine: str = DEFAULT_ROMANIZE_ENGINE) -> str: romanize("สามารถ", engine="thai2rom") # output: 'samat' + romanize("สามารถ", engine="tltk") + # output: 'samat' + romanize("ภาพยนตร์", engine="royin") # output: 'phapn' @@ -47,6 +51,8 @@ def romanize(text: str, engine: str = DEFAULT_ROMANIZE_ENGINE) -> str: if engine == "thai2rom": from pythainlp.transliterate.thai2rom import romanize + elif engine == "tltk": + from pythainlp.transliterate.tltk import romanize else: # use default engine "royin" from pythainlp.transliterate.royin import romanize @@ -67,10 +73,13 @@ def transliterate( :rtype: str :Options for engines: - * *icu* - pyicu, based on International Components for Unicode (ICU) - * *ipa* - epitran, output is International Phonetic Alphabet (IPA) * *thaig2p* - (default) Thai Grapheme-to-Phoneme, output is IPA (require PyTorch) + * *icu* - pyicu, based on International Components for Unicode (ICU) + * *ipa* - epitran, output is International Phonetic Alphabet (IPA) + * *tltk_g2p* - Thai Grapheme-to-Phoneme from\ + `TLTK `_., + * *tltk_ipa* - tltk, output is International Phonetic Alphabet (IPA) :Example: :: @@ -86,6 +95,12 @@ def transliterate( transliterate("สามารถ", engine="thaig2p") # output: 's aː ˩˩˦ . m aː t̚ ˥˩' + transliterate("สามารถ", engine="tltk_ipa") + # output: 'saː5.maːt3' + + transliterate("สามารถ", engine="tltk_g2p") + # output: 'saa4~maat2' + transliterate("ภาพยนตร์", engine="icu") # output: 'p̣hāphyntr̒' @@ -103,6 +118,10 @@ def transliterate( from pythainlp.transliterate.pyicu import transliterate elif engine == "ipa": from pythainlp.transliterate.ipa import transliterate + elif engine == "tltk_g2p": + from pythainlp.transliterate.tltk import tltk_g2p as transliterate + elif engine == "tltk_ipa": + from pythainlp.transliterate.tltk import tltk_ipa as transliterate else: # use default engine: "thaig2p" from pythainlp.transliterate.thaig2p import transliterate diff --git a/pythainlp/transliterate/tltk.py b/pythainlp/transliterate/tltk.py new file mode 100644 index 000000000..8795ce756 --- /dev/null +++ b/pythainlp/transliterate/tltk.py @@ -0,0 +1,17 @@ +# -*- coding: utf-8 -*- +from tltk.nlp import g2p, th2ipa, th2roman + + +def romanize(text: str) -> str: + _temp = th2roman(text) + return _temp[:_temp.rfind(" ")].replace("", "") + + +def tltk_g2p(text: str) -> str: + _temp = g2p(text).split("")[1].replace("|", "").replace("|", " ") + return _temp.replace("", "") + + +def tltk_ipa(text: str) -> str: + _temp = th2ipa(text) + return _temp[:_temp.rfind(" ")].replace("", "") diff --git a/setup.py b/setup.py index e0597f58b..e7c4fb5ab 100644 --- a/setup.py +++ b/setup.py @@ -73,6 +73,7 @@ "spylls>=0.1.5", "symspellpy>=6.7.0" ], + "tltk": ["tltk>=1.3.8"], "full": [ "PyYAML>=5.3.1", "attacut>=1.0.4", @@ -94,7 +95,8 @@ "sefr_cut>=1.1", "phunspell>=0.1.6", "spylls>=0.1.5", - "symspellpy>=6.7.0" + "symspellpy>=6.7.0", + "tltk>=1.3.8", ], } diff --git a/tests/test_spell.py b/tests/test_spell.py index 2183f5594..bb273709a 100644 --- a/tests/test_spell.py +++ b/tests/test_spell.py @@ -7,7 +7,8 @@ correct, spell, spell_sent, - correct_sent + correct_sent, + symspellpy, ) @@ -40,6 +41,14 @@ def test_spell(self): self.assertIsInstance(result, list) self.assertGreater(len(result), 0) + result = spell("เน้ร", engine="tltk") + self.assertIsInstance(result, list) + self.assertGreater(len(result), 0) + + result = spell("เดก", engine="tltk") + self.assertIsInstance(result, list) + self.assertGreater(len(result), 0) + def test_word_correct(self): self.assertEqual(correct(None), "") self.assertEqual(correct(""), "") @@ -123,3 +132,6 @@ def test_correct_sent(self): self.assertIsNotNone( correct_sent(self.spell_sent, engine="symspellpy") ) + self.assertIsNotNone( + symspellpy.correct_sent(self.spell_sent) + ) diff --git a/tests/test_summarize.py b/tests/test_summarize.py index f5cb0161f..2c36ebced 100644 --- a/tests/test_summarize.py +++ b/tests/test_summarize.py @@ -24,3 +24,5 @@ def test_summarize(self): self.assertIsNotNone(summarize([])) self.assertIsNotNone(summarize(text, 1, engine="mt5-small")) self.assertIsNotNone(summarize(text, 1, engine="XX")) + with self.assertRaises(ValueError): + self.assertIsNotNone(summarize(text, 1, engine="mt5-cat")) diff --git a/tests/test_tag.py b/tests/test_tag.py index 854c559e8..68232e30c 100644 --- a/tests/test_tag.py +++ b/tests/test_tag.py @@ -2,6 +2,7 @@ import unittest from os import path +from pythainlp import tag from pythainlp.tag import ( chunk_parse, @@ -10,6 +11,7 @@ pos_tag, pos_tag_sents, unigram, + tltk, ) from pythainlp.tag.locations import tag_provinces from pythainlp.tag.named_entity import ThaiNameTagger @@ -100,7 +102,7 @@ def test_pos_tag(self): pos_tag(tokens, engine="wangchanberta", corpus="lst20") ) self.assertIsNotNone( - pos_tag(tokens, engine="wangchanberta", corpus="lst20_ud") + pos_tag(tokens, engine="tltk") ) self.assertEqual(pos_tag_sents(None), []) @@ -112,6 +114,14 @@ def test_pos_tag(self): [("แมว", "NCMN"), ("วิ่ง", "VACT")], ], ) + with self.assertRaises(ValueError): + self.assertIsNotNone( + pos_tag(tokens, engine="wangchanberta", corpus="lst20_ud") + ) + with self.assertRaises(ValueError): + self.assertIsNotNone( + tltk.pos_tag(tokens, corpus="lst20") + ) # ### pythainlp.tag.PerceptronTagger @@ -355,3 +365,32 @@ def test_ner(self): # ("เช้า", "I-TIME"), # ], # ) + + def test_tltk_ner(self): + self.assertEqual(tltk.get_ner(""), []) + self.assertIsNotNone(tltk.get_ner("แมวทำอะไรตอนห้าโมงเช้า")) + self.assertIsNotNone(tltk.get_ner("แมวทำอะไรตอนห้าโมงเช้า", pos=False)) + self.assertIsNotNone( + tltk.get_ner( + "พลเอกประยุกธ์ จันทร์โอชา ประกาศในฐานะหัวหน้า" + ) + ) + self.assertIsNotNone( + tltk.get_ner( + "พลเอกประยุกธ์ จันทร์โอชา ประกาศในฐานะหัวหน้า", + tag=True, + ) + ) + self.assertIsNotNone( + tltk.get_ner( + """คณะวิทยาศาสตร์ประยุกต์และวิศวกรรมศาสตร์ มหาวิทยาลัยขอนแก่น + จังหวัดหนองคาย 43000""" + ) + ) + self.assertIsNotNone( + tltk.get_ner( + """คณะวิทยาศาสตร์ประยุกต์และวิศวกรรมศาสตร์ มหาวิทยาลัยขอนแก่น + จังหวัดหนองคาย 43000""", + tag=True, + ) + ) diff --git a/tests/test_tokenize.py b/tests/test_tokenize.py index cc90634fc..6d1f54bd0 100644 --- a/tests/test_tokenize.py +++ b/tests/test_tokenize.py @@ -21,6 +21,7 @@ tcc, word_tokenize, sefr_cut, + tltk, ) from pythainlp.tokenize import clause_tokenize as sent_clause_tokenize from pythainlp.util import dict_trie @@ -260,6 +261,15 @@ def test_sent_tokenize(self): self.assertIsNotNone( sent_tokenize(sent_1, keep_whitespace=False, engine="whitespace",), ) + self.assertIsNotNone( + sent_tokenize(sent_1, engine="tltk",), + ) + self.assertIsNotNone( + sent_tokenize(sent_2, engine="tltk",), + ) + self.assertIsNotNone( + sent_tokenize(sent_3, engine="tltk",), + ) self.assertFalse( " " in sent_tokenize( @@ -319,6 +329,17 @@ def test_subword_tokenize(self): self.assertFalse( " " in subword_tokenize("พันธมิตร ชา นม", keep_whitespace=False) ) + self.assertEqual(subword_tokenize(None, engine="tltk"), []) + self.assertEqual(subword_tokenize("", engine="tltk"), []) + self.assertIsInstance( + subword_tokenize("สวัสดิีดาวอังคาร", engine="tltk"), list + ) + self.assertFalse( + "า" in subword_tokenize("สวัสดีดาวอังคาร", engine="tltk") + ) + self.assertIsInstance( + subword_tokenize("โควิด19", engine="tltk"), list + ) with self.assertRaises(ValueError): subword_tokenize("นกแก้ว", engine="XX") # engine does not exist @@ -360,6 +381,7 @@ def test_word_tokenize(self): self.assertIsNotNone(word_tokenize(self.text_1, engine="nercut")) self.assertIsNotNone(word_tokenize(self.text_1, engine="newmm")) self.assertIsNotNone(word_tokenize(self.text_1, engine="sefr_cut")) + self.assertIsNotNone(word_tokenize(self.text_1, engine="tltk")) with self.assertRaises(ValueError): word_tokenize("หมอนทอง", engine="XX") # engine does not exist @@ -423,6 +445,29 @@ def test_icu(self): ["ฉัน", "รัก", "ภาษา", "ไทย", "เพราะ", "ฉัน", "เป็น", "คน", "ไทย"], ) + def test_tltk(self): + self.assertEqual(tltk.segment(None), []) + self.assertEqual(tltk.segment(""), []) + self.assertEqual( + tltk.syllable_tokenize( + "ฉันรักภาษาไทยเพราะฉันเป็นคนไทย" + ), + [ + 'ฉัน', + 'รัก', + 'ภา', + 'ษา', + 'ไทย', + 'เพราะ', + 'ฉัน', + 'เป็น', + 'คน', + 'ไทย' + ], + ) + self.assertEqual(tltk.syllable_tokenize(None), []) + self.assertEqual(tltk.syllable_tokenize(""), []) + def test_longest(self): self.assertEqual(longest.segment(None), []) self.assertEqual(longest.segment(""), []) diff --git a/tests/test_transliterate.py b/tests/test_transliterate.py index 2d1ca7a91..4a99b1676 100644 --- a/tests/test_transliterate.py +++ b/tests/test_transliterate.py @@ -57,6 +57,7 @@ def test_romanize(self): self.assertEqual(romanize(None), "") self.assertEqual(romanize(""), "") self.assertEqual(romanize("แมว"), "maeo") + self.assertEqual(romanize("แมว", engine="tltk"), "maeo") def test_romanize_royin_basic(self): for word in _BASIC_TESTS: @@ -136,6 +137,10 @@ def test_transliterate(self): self.assertEqual(transliterate("คน", engine="ipa"), "kʰon") self.assertIsNotNone(transliterate("คน", engine="thaig2p")) self.assertIsNotNone(transliterate("แมว", engine="thaig2p")) + self.assertIsNotNone(transliterate("คน", engine="tltk_g2p")) + self.assertIsNotNone(transliterate("แมว", engine="tltk_g2p")) + self.assertIsNotNone(transliterate("คน", engine="tltk_ipa")) + self.assertIsNotNone(transliterate("แมว", engine="tltk_ipa")) self.assertIsNotNone(trans_list("คน")) self.assertIsNotNone(xsampa_list("คน"))