Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/api/transliterate.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ Modules
.. autofunction:: romanize
.. autofunction:: transliterate
.. autofunction:: pronunciate
.. autofunction:: puan

Romanize Engines
----------------
Expand Down
4 changes: 3 additions & 1 deletion pythainlp/transliterate/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@
__all__ = [
"romanize",
"transliterate",
"pronunciate"
"pronunciate",
"puan"
]

from pythainlp.transliterate.core import romanize, transliterate, pronunciate
from pythainlp.transliterate.spoonerism import puan
73 changes: 73 additions & 0 deletions pythainlp/transliterate/spoonerism.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
# -*- coding: utf-8 -*-
from pythainlp.transliterate import pronunciate
from pythainlp import thai_consonants

_list_consonants = list(thai_consonants.replace("ห", ""))


def puan(word: str, show_pronunciation: bool = True) -> str:
"""
Thai Spoonerism

This function converts Thai word to spoonerism word.
It only supports words with 2 to 3 syllables.

:param str word: Thai word to be spoonerized
:param bool show_pronunciation: True (default) or False

:return: A string of Thai spoonerism word.
:rtype: str

:Example:
::

from pythainlp.transliterate import puan

puan("นาริน")
# output: 'นิน-รา'

puan("นาริน", False)
# output: 'นินรา'
"""
word = pronunciate(word, engine="w2p")
_list_char = []
_list_pron = word.split('-')
_mix_list = ""
if len(_list_pron) == 1:
return word
if show_pronunciation:
_mix_list = "-"
for i in _list_pron:
for j in i:
if j in _list_consonants:
_list_char.append(j)
break
elif "ห" is j and "หฺ" not in i and len(i) is 2:
_list_char.append(j)
break

list_w_char = list(zip(_list_pron, _list_char))
_list_w = []
if len(list_w_char) == 2:
_list_w.append(
list_w_char[1][0].replace(list_w_char[1][1], list_w_char[0][1], 1)
)
_list_w.append(
list_w_char[0][0].replace(list_w_char[0][1], list_w_char[1][1], 1)
)
elif len(list_w_char) == 3:
_list_w.append(_list_pron[0])
_list_w.append(
list_w_char[2][0].replace(list_w_char[2][1], list_w_char[1][1], 1)
)
_list_w.append(list_w_char[1][0].replace(
list_w_char[1][1], list_w_char[2][1], 1)
)
else: # > 3 syllables
raise ValueError(
"""{0} is more than 3 syllables.\n
It only supports words with 2 to 3 syllables.""".format(word)
)
if not show_pronunciation:
_list_w = [i.replace("หฺ", "") for i in _list_w]
return _mix_list.join(_list_w)
10 changes: 9 additions & 1 deletion tests/test_transliterate.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import unittest

import torch
from pythainlp.transliterate import romanize, transliterate, pronunciate
from pythainlp.transliterate import romanize, transliterate, pronunciate, puan
from pythainlp.transliterate.ipa import trans_list, xsampa_list
from pythainlp.transliterate.thai2rom import ThaiTransliterator
from pythainlp.corpus import remove
Expand Down Expand Up @@ -152,3 +152,11 @@ def test_pronunciate(self):
self.assertIsNotNone(pronunciate("มข.", engine="w2p"))
self.assertIsNotNone(pronunciate("มช.", engine="w2p"))
self.assertIsNotNone(pronunciate("jks", engine="w2p"))

def test_puan(self):
self.assertEqual(puan("นาริน"), "นิน-รา")
self.assertEqual(puan("นาริน", False), "นินรา")
self.assertEqual(puan("แสงดีนะ"), "แสง-ดะ-นี")
self.assertEqual(puan("แสงดีนะ", False), "แสงดะนี")
with self.assertRaises(ValueError):
self.assertEqual(puan("สวัสดีครับ"), "สวัสดีครับ")