Skip to content

Commit 7ddeb04

Browse files
committed
Add pythainlp.util.rhyme
1 parent 73b17e3 commit 7ddeb04

File tree

4 files changed

+53
-0
lines changed

4 files changed

+53
-0
lines changed

docs/api/util.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ Modules
3838
.. autofunction:: remove_tonemark
3939
.. autofunction:: remove_zw
4040
.. autofunction:: reorder_vowels
41+
.. autofunction:: rhyme
4142
.. autofunction:: sound_syllable
4243
.. autofunction:: syllable_length
4344
.. autofunction:: syllable_open_close_detector

pythainlp/util/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
"remove_tonemark",
4646
"remove_zw",
4747
"reorder_vowels",
48+
"rhyme",
4849
"text_to_arabic_digit",
4950
"text_to_thai_digit",
5051
"thai_digit_to_arabic_digit",
@@ -127,3 +128,4 @@
127128
from pythainlp.util.encoding import tis620_to_utf8
128129
import pythainlp.util.spell_words as spell_words
129130
from pythainlp.util.abbreviation import abbreviation_to_full_text
131+
from pythainlp.util.pronounce import rhyme

pythainlp/util/pronounce.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
# -*- coding: utf-8 -*-
2+
# Copyright (C) 2016-2023 PyThaiNLP Project
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
from typing import List
16+
17+
from pythainlp.corpus import thai_words
18+
from pythainlp.tokenize import syllable_tokenize
19+
from pythainlp.khavee import KhaveeVerifier
20+
21+
22+
kv = KhaveeVerifier()
23+
all_thai_words_dict = [i for i in list(thai_words()) if len(syllable_tokenize(i))==1]
24+
25+
26+
def rhyme(word: str)->List[str]:
27+
"""
28+
Find Thai rhyme
29+
30+
:param str word: A Thai word
31+
:return: All list Thai rhyme words
32+
:rtype: List[str]
33+
34+
:Example:
35+
::
36+
from pythainlp.util import rhyme
37+
38+
print(rhyme("จีบ"))
39+
# output: ['กลีบ', 'กีบ', 'ครีบ', ...]
40+
"""
41+
list_sumpus=[]
42+
for i in all_thai_words_dict:
43+
if kv.is_sumpus(word,i) and i!=word:
44+
list_sumpus.append(i)
45+
return sorted(list_sumpus)

tests/test_util.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
remove_dup_spaces,
3737
remove_tonemark,
3838
remove_zw,
39+
rhyme,
3940
text_to_arabic_digit,
4041
text_to_thai_digit,
4142
thaiword_to_date,
@@ -853,5 +854,9 @@ def test_spell_word(self):
853854
self.assertEqual(spell_word("คน"),['คอ', 'นอ', 'คน'])
854855
self.assertEqual(spell_word("คนดี"),['คอ', 'นอ', 'คน', 'ดอ', 'อี', 'ดี', 'คนดี'])
855856

857+
def test_rhyme(self):
858+
self.assertIsInstance(rhyme("แมว"), list)
859+
self.assertTrue(len(rhyme("แมว")) > 2)
860+
856861
# def test_abbreviation_to_full_text(self):
857862
# self.assertIsInstance(abbreviation_to_full_text("รร.ของเราน่าอยู่", list))

0 commit comments

Comments
 (0)