Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/api/corpus.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@ countries
.. autofunction:: countries
:noindex:

find_synonym
~~~~~~~~~~~~
.. autofunction:: find_synonym
:noindex:

get_corpus
~~~~~~~~~~
.. autofunction:: get_corpus
Expand Down
2 changes: 2 additions & 0 deletions pythainlp/corpus/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
"corpus_path",
"countries",
"download",
"find_synonyms",
"get_corpus",
"get_corpus_as_is",
"get_corpus_db",
Expand Down Expand Up @@ -101,6 +102,7 @@ def corpus_db_path() -> str:
) # these imports must come before other pythainlp.corpus.* imports
from pythainlp.corpus.common import (
countries,
find_synonyms,
provinces,
thai_dict,
thai_family_names,
Expand Down
37 changes: 37 additions & 0 deletions pythainlp/corpus/common.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
# -*- coding: utf-8 -*-
# SPDX-FileCopyrightText: Copyright 2016-2023 PyThaiNLP Project
# SPDX-License-Identifier: Apache-2.0

"""
Common lists of words.
"""

__all__ = [
"countries",
"find_synonyms",
"provinces",
"thai_family_names",
"thai_female_names",
Expand Down Expand Up @@ -336,3 +338,38 @@ def thai_synonyms() -> dict:
def thai_synonym() -> dict:
warnings.warn("Deprecated: Use thai_synonyms() instead.", DeprecationWarning)
return thai_synonyms()


def find_synonyms(word: str) -> List[str]:
"""
Find synonyms

:param str word: Thai word
:return: List of synonyms of the input word or an empty list if it isn't exist.
:rtype: List[str]

:Example:
::

from pythainlp.corpus import find_synonyms

print(find_synonyms("หมู"))
# output: ['จรุก', 'วราหะ', 'วราห์', 'ศูกร', 'สุกร']
"""
synonyms = thai_synonyms() # get a dictionary of {word, synonym}
list_synonym = []

if word in synonyms["word"]: # find by word
list_synonym.extend(synonyms["synonym"][synonyms["word"].index(word)])

for idx, words in enumerate(synonyms["synonym"]): # find by synonym
if word in words:
list_synonym.extend(synonyms["synonym"][idx])
list_synonym.append(synonyms["word"][idx])

list_synonym = sorted(list(set(list_synonym)))

if word in list_synonym: # remove same word
list_synonym.remove(word)

return list_synonym
8 changes: 8 additions & 0 deletions tests/test_corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
conceptnet,
countries,
download,
find_synonyms,
get_corpus_db,
get_corpus_db_detail,
get_corpus_default_db,
Expand Down Expand Up @@ -204,3 +205,10 @@ def test_zip(self):
p = get_corpus_path("test_zip")
self.assertEqual(os.path.isdir(p), True)
self.assertEqual(remove("test_zip"), True)

def test_find_synonyms(self):
self.assertEqual(
find_synonyms("หมู"),
['จรุก', 'วราหะ', 'วราห์', 'ศูกร', 'สุกร']
)
self.assertEqual(find_synonyms("1"), [])