diff --git a/docs/api/transliterate.rst b/docs/api/transliterate.rst index ea6fc8615..e75f7b96a 100644 --- a/docs/api/transliterate.rst +++ b/docs/api/transliterate.rst @@ -10,6 +10,7 @@ Modules .. autofunction:: romanize .. autofunction:: transliterate .. autofunction:: pronunciate +.. autofunction:: puan Romanize Engines ---------------- diff --git a/pythainlp/transliterate/__init__.py b/pythainlp/transliterate/__init__.py index 31d40a549..4046a2fe5 100644 --- a/pythainlp/transliterate/__init__.py +++ b/pythainlp/transliterate/__init__.py @@ -6,7 +6,9 @@ __all__ = [ "romanize", "transliterate", - "pronunciate" + "pronunciate", + "puan" ] from pythainlp.transliterate.core import romanize, transliterate, pronunciate +from pythainlp.transliterate.spoonerism import puan diff --git a/pythainlp/transliterate/spoonerism.py b/pythainlp/transliterate/spoonerism.py new file mode 100644 index 000000000..0db62f98a --- /dev/null +++ b/pythainlp/transliterate/spoonerism.py @@ -0,0 +1,73 @@ +# -*- coding: utf-8 -*- +from pythainlp.transliterate import pronunciate +from pythainlp import thai_consonants + +_list_consonants = list(thai_consonants.replace("ห", "")) + + +def puan(word: str, show_pronunciation: bool = True) -> str: + """ + Thai Spoonerism + + This function converts Thai word to spoonerism word. + It only supports words with 2 to 3 syllables. + + :param str word: Thai word to be spoonerized + :param bool show_pronunciation: True (default) or False + + :return: A string of Thai spoonerism word. + :rtype: str + + :Example: + :: + + from pythainlp.transliterate import puan + + puan("นาริน") + # output: 'นิน-รา' + + puan("นาริน", False) + # output: 'นินรา' + """ + word = pronunciate(word, engine="w2p") + _list_char = [] + _list_pron = word.split('-') + _mix_list = "" + if len(_list_pron) == 1: + return word + if show_pronunciation: + _mix_list = "-" + for i in _list_pron: + for j in i: + if j in _list_consonants: + _list_char.append(j) + break + elif "ห" is j and "หฺ" not in i and len(i) is 2: + _list_char.append(j) + break + + list_w_char = list(zip(_list_pron, _list_char)) + _list_w = [] + if len(list_w_char) == 2: + _list_w.append( + list_w_char[1][0].replace(list_w_char[1][1], list_w_char[0][1], 1) + ) + _list_w.append( + list_w_char[0][0].replace(list_w_char[0][1], list_w_char[1][1], 1) + ) + elif len(list_w_char) == 3: + _list_w.append(_list_pron[0]) + _list_w.append( + list_w_char[2][0].replace(list_w_char[2][1], list_w_char[1][1], 1) + ) + _list_w.append(list_w_char[1][0].replace( + list_w_char[1][1], list_w_char[2][1], 1) + ) + else: # > 3 syllables + raise ValueError( + """{0} is more than 3 syllables.\n + It only supports words with 2 to 3 syllables.""".format(word) + ) + if not show_pronunciation: + _list_w = [i.replace("หฺ", "") for i in _list_w] + return _mix_list.join(_list_w) diff --git a/tests/test_transliterate.py b/tests/test_transliterate.py index 4a99b1676..d27b8ac03 100644 --- a/tests/test_transliterate.py +++ b/tests/test_transliterate.py @@ -3,7 +3,7 @@ import unittest import torch -from pythainlp.transliterate import romanize, transliterate, pronunciate +from pythainlp.transliterate import romanize, transliterate, pronunciate, puan from pythainlp.transliterate.ipa import trans_list, xsampa_list from pythainlp.transliterate.thai2rom import ThaiTransliterator from pythainlp.corpus import remove @@ -152,3 +152,11 @@ def test_pronunciate(self): self.assertIsNotNone(pronunciate("มข.", engine="w2p")) self.assertIsNotNone(pronunciate("มช.", engine="w2p")) self.assertIsNotNone(pronunciate("jks", engine="w2p")) + + def test_puan(self): + self.assertEqual(puan("นาริน"), "นิน-รา") + self.assertEqual(puan("นาริน", False), "นินรา") + self.assertEqual(puan("แสงดีนะ"), "แสง-ดะ-นี") + self.assertEqual(puan("แสงดีนะ", False), "แสงดะนี") + with self.assertRaises(ValueError): + self.assertEqual(puan("สวัสดีครับ"), "สวัสดีครับ")