Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/api/corpus.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ Modules
.. autofunction:: thai_words
.. autofunction:: thai_wsd_dict
.. autofunction:: thai_orst_words
.. autofunction:: thai_synonym
.. autofunction:: thai_syllables
.. autofunction:: thai_negations
.. autofunction:: thai_family_names
Expand Down
2 changes: 2 additions & 0 deletions pythainlp/corpus/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
"thai_female_names",
"thai_male_names",
"thai_negations",
"thai_synonym",
"thai_stopwords",
"thai_syllables",
"thai_words",
Expand Down Expand Up @@ -110,6 +111,7 @@ def corpus_db_path() -> str:
thai_female_names,
thai_male_names,
thai_negations,
thai_synonym,
thai_stopwords,
thai_syllables,
thai_words,
Expand Down
31 changes: 28 additions & 3 deletions pythainlp/corpus/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
"thai_words",
"thai_dict",
"thai_wsd_dict",
"thai_synonym",
]

from typing import FrozenSet, List, Union
Expand Down Expand Up @@ -64,6 +65,7 @@

_THAI_DICT = {}
_THAI_WSD_DICT = {}
_THAI_SYNONYM = None


def countries() -> FrozenSet[str]:
Expand Down Expand Up @@ -270,7 +272,7 @@ def thai_dict() -> dict:
<https://pythainlp.github.io/pythainlp-corpus/thai_dict.html>`_)

:return: Thai word with part-of-speech type and definition
:rtype: :class:`frozenset`
:rtype: dict
"""
global _THAI_DICT
if _THAI_DICT == {}:
Expand All @@ -292,7 +294,7 @@ def thai_wsd_dict() -> dict:
<https://pythainlp.github.io/pythainlp-corpus/thai_dict.html>`_)

:return: Thai word with part-of-speech type and definition
:rtype: :class:`frozenset`
:rtype: dict
"""
global _THAI_WSD_DICT
if _THAI_WSD_DICT == {}:
Expand All @@ -308,4 +310,27 @@ def thai_wsd_dict() -> dict:
_THAI_WSD_DICT["word"].append(i)
_THAI_WSD_DICT["meaning"].append(_use)

return _THAI_WSD_DICT
return _THAI_WSD_DICT


def thai_synonym() -> dict:
"""
Return Thai synonym.
\n(See: `thai_synonym\
<https://pythainlp.github.io/pythainlp-corpus/thai_synonym.html>`_)

:return: Thai word with part-of-speech type and synonym
:rtype: dict
"""
global _THAI_SYNONYM
if _THAI_SYNONYM == None:
import csv
_THAI_SYNONYM = {"word":[], "pos":[], "synonym":[]}
with open(get_corpus_path("thai_synonym"), newline="\n", encoding="utf-8") as csvfile:
reader = csv.DictReader(csvfile, delimiter=",")
for row in reader:
_THAI_SYNONYM["word"].append(row["word"])
_THAI_SYNONYM["pos"].append(row["pos"])
_THAI_SYNONYM["synonym"].append(row["synonym"].split("|"))

return _THAI_SYNONYM
2 changes: 2 additions & 0 deletions tests/test_corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
thai_negations,
thai_stopwords,
thai_syllables,
thai_synonym,
thai_words,
tnc,
ttc,
Expand All @@ -44,6 +45,7 @@ def test_corpus(self):
self.assertIsInstance(countries(), frozenset)
self.assertIsInstance(provinces(), frozenset)
self.assertIsInstance(provinces(details=True), list)
self.assertIsInstance(thai_synonym(), dict)
self.assertEqual(
len(provinces(details=False)), len(provinces(details=True))
)
Expand Down