From 4570e4ff5642b6fa7e9d6f5e113275bdec5acdaf Mon Sep 17 00:00:00 2001 From: Wannaphong Phatthiyaphaibun Date: Fri, 5 Nov 2021 22:43:47 +0700 Subject: [PATCH 1/8] Add pythainlp.transliterate.puan #620 Add pythainlp.transliterate.puan #620 --- docs/api/transliterate.rst | 1 + pythainlp/transliterate/__init__.py | 4 +- pythainlp/transliterate/spoonerism.py | 68 +++++++++++++++++++++++++++ tests/test_transliterate.py | 9 +++- 4 files changed, 80 insertions(+), 2 deletions(-) create mode 100644 pythainlp/transliterate/spoonerism.py diff --git a/docs/api/transliterate.rst b/docs/api/transliterate.rst index ea6fc8615..e75f7b96a 100644 --- a/docs/api/transliterate.rst +++ b/docs/api/transliterate.rst @@ -10,6 +10,7 @@ Modules .. autofunction:: romanize .. autofunction:: transliterate .. autofunction:: pronunciate +.. autofunction:: puan Romanize Engines ---------------- diff --git a/pythainlp/transliterate/__init__.py b/pythainlp/transliterate/__init__.py index 31d40a549..4046a2fe5 100644 --- a/pythainlp/transliterate/__init__.py +++ b/pythainlp/transliterate/__init__.py @@ -6,7 +6,9 @@ __all__ = [ "romanize", "transliterate", - "pronunciate" + "pronunciate", + "puan" ] from pythainlp.transliterate.core import romanize, transliterate, pronunciate +from pythainlp.transliterate.spoonerism import puan diff --git a/pythainlp/transliterate/spoonerism.py b/pythainlp/transliterate/spoonerism.py new file mode 100644 index 000000000..2276ea166 --- /dev/null +++ b/pythainlp/transliterate/spoonerism.py @@ -0,0 +1,68 @@ +# -*- coding: utf-8 -*- +from pythainlp.transliterate import pronunciate +from pythainlp import thai_consonants + +_list_consonants = list(thai_consonants.replace("ห", "")) + + +def puan(word: str, show_pronunciation: bool = True) -> str: + """ + Thai Spoonerism + + This function covent Thai word to Thai spoonerism word. + This function only supports words with 2 to 3 syllables. + + :param str word: Thai word to be spoonerism + :param bool show_pronunciation: True (default) or False + + :return: A string of Thai spoonerism word. + :rtype: str + + :Example: + :: + + from pythainlp.transliterate import puan + + puan("นาริน") + # output: 'นิน-รา' + + puan("นาริน", False) + # output: 'นินรา' + """ + _word = pronunciate(word, engine="w2p") + _list_char = [] + _list_pron = _word.split('-') + _mix_list = "" + if len(_list_pron) == 1: + return word + if show_pronunciation: + _mix_list = "-" + for i in _list_pron: + for j in i: + if j in _list_consonants: + _list_char.append(j) + break + elif "ห" in j and "หฺ" not in j: + _list_char.append(j) + break + + list_w_char = list(zip(_list_pron, _list_char)) + _list_w = [] + if len(list_w_char) == 2: + _list_w.append( + list_w_char[1][0].replace(list_w_char[1][1], list_w_char[0][1], 1) + ) + _list_w.append( + list_w_char[0][0].replace(list_w_char[0][1], list_w_char[1][1], 1) + ) + elif len(list_w_char) == 3: + _list_w.append(_list_pron[0]) + _list_w.append( + list_w_char[2][0].replace(list_w_char[2][1], list_w_char[1][1], 1) + ) + _list_w.append(list_w_char[1][0].replace( + list_w_char[1][1], list_w_char[2][1], 1) + ) + else: # > 3 syllables? + return word + return _mix_list.join(_list_w) diff --git a/tests/test_transliterate.py b/tests/test_transliterate.py index 4a99b1676..8fa4d74c0 100644 --- a/tests/test_transliterate.py +++ b/tests/test_transliterate.py @@ -3,7 +3,7 @@ import unittest import torch -from pythainlp.transliterate import romanize, transliterate, pronunciate +from pythainlp.transliterate import romanize, transliterate, pronunciate, puan from pythainlp.transliterate.ipa import trans_list, xsampa_list from pythainlp.transliterate.thai2rom import ThaiTransliterator from pythainlp.corpus import remove @@ -152,3 +152,10 @@ def test_pronunciate(self): self.assertIsNotNone(pronunciate("มข.", engine="w2p")) self.assertIsNotNone(pronunciate("มช.", engine="w2p")) self.assertIsNotNone(pronunciate("jks", engine="w2p")) + + def test_puan(self): + self.assertEqual(puan("นาริน"), "นิน-รา") + self.assertEqual(puan("นาริน", False), "นินรา") + self.assertEqual(puan("สวัสดี"), "สะ-หี-ดวัด") + self.assertEqual(puan("สวัสดี", False), "สะหีดวัด") + self.assertEqual(puan("สวัสดีครับ"), "สวัสดีครับ") From 1f7a673a24ca53a0f45e7ca6ca8c5d34327db75e Mon Sep 17 00:00:00 2001 From: Wannaphong Phatthiyaphaibun Date: Fri, 5 Nov 2021 22:46:46 +0700 Subject: [PATCH 2/8] Fixed PEP8 --- pythainlp/transliterate/spoonerism.py | 2 +- tests/test_transliterate.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pythainlp/transliterate/spoonerism.py b/pythainlp/transliterate/spoonerism.py index 2276ea166..a511d74f0 100644 --- a/pythainlp/transliterate/spoonerism.py +++ b/pythainlp/transliterate/spoonerism.py @@ -63,6 +63,6 @@ def puan(word: str, show_pronunciation: bool = True) -> str: _list_w.append(list_w_char[1][0].replace( list_w_char[1][1], list_w_char[2][1], 1) ) - else: # > 3 syllables? + else: # > 3 syllables? return word return _mix_list.join(_list_w) diff --git a/tests/test_transliterate.py b/tests/test_transliterate.py index 8fa4d74c0..6f64f7ee4 100644 --- a/tests/test_transliterate.py +++ b/tests/test_transliterate.py @@ -152,7 +152,7 @@ def test_pronunciate(self): self.assertIsNotNone(pronunciate("มข.", engine="w2p")) self.assertIsNotNone(pronunciate("มช.", engine="w2p")) self.assertIsNotNone(pronunciate("jks", engine="w2p")) - + def test_puan(self): self.assertEqual(puan("นาริน"), "นิน-รา") self.assertEqual(puan("นาริน", False), "นินรา") From a1c9b64d0d0b21ae19628a41fb6026da50a3f447 Mon Sep 17 00:00:00 2001 From: Wannaphong Phatthiyaphaibun Date: Fri, 5 Nov 2021 22:58:25 +0700 Subject: [PATCH 3/8] Update spoonerism.py --- pythainlp/transliterate/spoonerism.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pythainlp/transliterate/spoonerism.py b/pythainlp/transliterate/spoonerism.py index a511d74f0..252e3bddd 100644 --- a/pythainlp/transliterate/spoonerism.py +++ b/pythainlp/transliterate/spoonerism.py @@ -42,7 +42,7 @@ def puan(word: str, show_pronunciation: bool = True) -> str: if j in _list_consonants: _list_char.append(j) break - elif "ห" in j and "หฺ" not in j: + elif "ห" in j and "หฺ" not in i: _list_char.append(j) break @@ -65,4 +65,6 @@ def puan(word: str, show_pronunciation: bool = True) -> str: ) else: # > 3 syllables? return word + if not show_pronunciation: + _list_w = [i.replace("หฺ", "") for i in _list_w] return _mix_list.join(_list_w) From 75bc94768fc9ab443132920c0214a8abdc443896 Mon Sep 17 00:00:00 2001 From: Wannaphong Phatthiyaphaibun Date: Sat, 6 Nov 2021 00:13:09 +0700 Subject: [PATCH 4/8] Update test_transliterate.py --- tests/test_transliterate.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/test_transliterate.py b/tests/test_transliterate.py index 6f64f7ee4..56f1119ad 100644 --- a/tests/test_transliterate.py +++ b/tests/test_transliterate.py @@ -156,6 +156,3 @@ def test_pronunciate(self): def test_puan(self): self.assertEqual(puan("นาริน"), "นิน-รา") self.assertEqual(puan("นาริน", False), "นินรา") - self.assertEqual(puan("สวัสดี"), "สะ-หี-ดวัด") - self.assertEqual(puan("สวัสดี", False), "สะหีดวัด") - self.assertEqual(puan("สวัสดีครับ"), "สวัสดีครับ") From 6584d2870d17dffa0f1014d0d39c96d3af84deaf Mon Sep 17 00:00:00 2001 From: Wannaphong Phatthiyaphaibun Date: Sat, 6 Nov 2021 00:27:25 +0700 Subject: [PATCH 5/8] Update Spoonerism --- pythainlp/transliterate/spoonerism.py | 17 ++++++++++------- tests/test_transliterate.py | 4 ++++ 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/pythainlp/transliterate/spoonerism.py b/pythainlp/transliterate/spoonerism.py index 252e3bddd..89ca10161 100644 --- a/pythainlp/transliterate/spoonerism.py +++ b/pythainlp/transliterate/spoonerism.py @@ -9,8 +9,8 @@ def puan(word: str, show_pronunciation: bool = True) -> str: """ Thai Spoonerism - This function covent Thai word to Thai spoonerism word. - This function only supports words with 2 to 3 syllables. + This function converts Thai word to spoonerized. + It only supports words with 2 to 3 syllables. :param str word: Thai word to be spoonerism :param bool show_pronunciation: True (default) or False @@ -29,12 +29,12 @@ def puan(word: str, show_pronunciation: bool = True) -> str: puan("นาริน", False) # output: 'นินรา' """ - _word = pronunciate(word, engine="w2p") + word = pronunciate(word, engine="w2p") _list_char = [] - _list_pron = _word.split('-') + _list_pron = word.split('-') _mix_list = "" if len(_list_pron) == 1: - return word + return word[0] if show_pronunciation: _mix_list = "-" for i in _list_pron: @@ -63,8 +63,11 @@ def puan(word: str, show_pronunciation: bool = True) -> str: _list_w.append(list_w_char[1][0].replace( list_w_char[1][1], list_w_char[2][1], 1) ) - else: # > 3 syllables? - return word + else: # > 3 syllables + raise ValueError( + """{0} is more than 3 syllables.\n + It only supports words with 2 to 3 syllables.""".format(word) + ) if not show_pronunciation: _list_w = [i.replace("หฺ", "") for i in _list_w] return _mix_list.join(_list_w) diff --git a/tests/test_transliterate.py b/tests/test_transliterate.py index 56f1119ad..d27b8ac03 100644 --- a/tests/test_transliterate.py +++ b/tests/test_transliterate.py @@ -156,3 +156,7 @@ def test_pronunciate(self): def test_puan(self): self.assertEqual(puan("นาริน"), "นิน-รา") self.assertEqual(puan("นาริน", False), "นินรา") + self.assertEqual(puan("แสงดีนะ"), "แสง-ดะ-นี") + self.assertEqual(puan("แสงดีนะ", False), "แสงดะนี") + with self.assertRaises(ValueError): + self.assertEqual(puan("สวัสดีครับ"), "สวัสดีครับ") From e8583e6ad8c0a3d89fe4bcb063a776f1ad139447 Mon Sep 17 00:00:00 2001 From: Wannaphong Phatthiyaphaibun Date: Sat, 6 Nov 2021 00:29:15 +0700 Subject: [PATCH 6/8] Update spoonerism.py --- pythainlp/transliterate/spoonerism.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pythainlp/transliterate/spoonerism.py b/pythainlp/transliterate/spoonerism.py index 89ca10161..df8113efa 100644 --- a/pythainlp/transliterate/spoonerism.py +++ b/pythainlp/transliterate/spoonerism.py @@ -34,7 +34,7 @@ def puan(word: str, show_pronunciation: bool = True) -> str: _list_pron = word.split('-') _mix_list = "" if len(_list_pron) == 1: - return word[0] + return word if show_pronunciation: _mix_list = "-" for i in _list_pron: From 66f1552b635951b6b30ca01236a2bff54489c3ac Mon Sep 17 00:00:00 2001 From: Wannaphong Phatthiyaphaibun Date: Sat, 6 Nov 2021 01:11:37 +0700 Subject: [PATCH 7/8] Update spoonerism.py --- pythainlp/transliterate/spoonerism.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pythainlp/transliterate/spoonerism.py b/pythainlp/transliterate/spoonerism.py index df8113efa..d6cd8554c 100644 --- a/pythainlp/transliterate/spoonerism.py +++ b/pythainlp/transliterate/spoonerism.py @@ -42,7 +42,7 @@ def puan(word: str, show_pronunciation: bool = True) -> str: if j in _list_consonants: _list_char.append(j) break - elif "ห" in j and "หฺ" not in i: + elif "ห" is j and "หฺ" not in i and len(i) is 2: _list_char.append(j) break From ca4c28d0a86561006949ea6c1e537269ee7606be Mon Sep 17 00:00:00 2001 From: Wannaphong Phatthiyaphaibun Date: Sat, 6 Nov 2021 01:30:54 +0700 Subject: [PATCH 8/8] Update spoonerism.py --- pythainlp/transliterate/spoonerism.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pythainlp/transliterate/spoonerism.py b/pythainlp/transliterate/spoonerism.py index d6cd8554c..0db62f98a 100644 --- a/pythainlp/transliterate/spoonerism.py +++ b/pythainlp/transliterate/spoonerism.py @@ -9,10 +9,10 @@ def puan(word: str, show_pronunciation: bool = True) -> str: """ Thai Spoonerism - This function converts Thai word to spoonerized. + This function converts Thai word to spoonerism word. It only supports words with 2 to 3 syllables. - :param str word: Thai word to be spoonerism + :param str word: Thai word to be spoonerized :param bool show_pronunciation: True (default) or False :return: A string of Thai spoonerism word.