diff --git a/docs/api/util.rst b/docs/api/util.rst index d3773a9c1..41b635d93 100644 --- a/docs/api/util.rst +++ b/docs/api/util.rst @@ -162,6 +162,12 @@ Modules The `reorder_vowels` function is a text processing utility for reordering vowel characters in Thai text. It is essential for phonetic analysis and pronunciation guides. +.. autofunction:: rhyme + :noindex: + + + The `rhyme` function is a utility for find rhyme of Thai word. + .. autofunction:: sound_syllable :noindex: diff --git a/pythainlp/util/__init__.py b/pythainlp/util/__init__.py index 59c424126..2b2ff40e4 100644 --- a/pythainlp/util/__init__.py +++ b/pythainlp/util/__init__.py @@ -45,6 +45,7 @@ "remove_tonemark", "remove_zw", "reorder_vowels", + "rhyme", "text_to_arabic_digit", "text_to_thai_digit", "thai_digit_to_arabic_digit", @@ -126,3 +127,4 @@ from pythainlp.util.encoding import tis620_to_utf8 from pythainlp.util import spell_words from pythainlp.util.abbreviation import abbreviation_to_full_text +from pythainlp.util.pronounce import rhyme diff --git a/pythainlp/util/pronounce.py b/pythainlp/util/pronounce.py new file mode 100644 index 000000000..d1021d671 --- /dev/null +++ b/pythainlp/util/pronounce.py @@ -0,0 +1,47 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2016-2023 PyThaiNLP Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import List + +from pythainlp.corpus import thai_words +from pythainlp.tokenize import syllable_tokenize +from pythainlp.khavee import KhaveeVerifier + + +kv = KhaveeVerifier() +all_thai_words_dict = [ + i for i in list(thai_words()) if len(syllable_tokenize(i)) == 1 +] + + +def rhyme(word: str) -> List[str]: + """ + Find Thai rhyme + + :param str word: A Thai word + :return: All list Thai rhyme words + :rtype: List[str] + + :Example: + :: + from pythainlp.util import rhyme + + print(rhyme("จีบ")) + # output: ['กลีบ', 'กีบ', 'ครีบ', ...] + """ + list_sumpus = [] + for i in all_thai_words_dict: + if kv.is_sumpus(word, i) and i != word: + list_sumpus.append(i) + return sorted(list_sumpus) diff --git a/tests/test_util.py b/tests/test_util.py index 065f2082c..1840e2dc0 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -36,6 +36,7 @@ remove_dup_spaces, remove_tonemark, remove_zw, + rhyme, text_to_arabic_digit, text_to_thai_digit, thaiword_to_date, @@ -853,5 +854,9 @@ def test_spell_word(self): self.assertEqual(spell_word("คน"),['คอ', 'นอ', 'คน']) self.assertEqual(spell_word("คนดี"),['คอ', 'นอ', 'คน', 'ดอ', 'อี', 'ดี', 'คนดี']) + def test_rhyme(self): + self.assertIsInstance(rhyme("แมว"), list) + self.assertTrue(len(rhyme("แมว")) > 2) + # def test_abbreviation_to_full_text(self): # self.assertIsInstance(abbreviation_to_full_text("รร.ของเราน่าอยู่", list))