diff --git a/docs/api/corpus.rst b/docs/api/corpus.rst index ff8617358..b68ffacc3 100644 --- a/docs/api/corpus.rst +++ b/docs/api/corpus.rst @@ -21,6 +21,7 @@ Modules .. autofunction:: thai_words .. autofunction:: thai_wsd_dict .. autofunction:: thai_orst_words +.. autofunction:: thai_synonym .. autofunction:: thai_syllables .. autofunction:: thai_negations .. autofunction:: thai_family_names diff --git a/pythainlp/corpus/__init__.py b/pythainlp/corpus/__init__.py index ca6e047d4..3ca56e0d0 100644 --- a/pythainlp/corpus/__init__.py +++ b/pythainlp/corpus/__init__.py @@ -37,6 +37,7 @@ "thai_female_names", "thai_male_names", "thai_negations", + "thai_synonym", "thai_stopwords", "thai_syllables", "thai_words", @@ -110,6 +111,7 @@ def corpus_db_path() -> str: thai_female_names, thai_male_names, thai_negations, + thai_synonym, thai_stopwords, thai_syllables, thai_words, diff --git a/pythainlp/corpus/common.py b/pythainlp/corpus/common.py index 15007b5e3..411148358 100644 --- a/pythainlp/corpus/common.py +++ b/pythainlp/corpus/common.py @@ -28,6 +28,7 @@ "thai_words", "thai_dict", "thai_wsd_dict", + "thai_synonym", ] from typing import FrozenSet, List, Union @@ -64,6 +65,7 @@ _THAI_DICT = {} _THAI_WSD_DICT = {} +_THAI_SYNONYM = None def countries() -> FrozenSet[str]: @@ -270,7 +272,7 @@ def thai_dict() -> dict: `_) :return: Thai word with part-of-speech type and definition - :rtype: :class:`frozenset` + :rtype: dict """ global _THAI_DICT if _THAI_DICT == {}: @@ -292,7 +294,7 @@ def thai_wsd_dict() -> dict: `_) :return: Thai word with part-of-speech type and definition - :rtype: :class:`frozenset` + :rtype: dict """ global _THAI_WSD_DICT if _THAI_WSD_DICT == {}: @@ -308,4 +310,27 @@ def thai_wsd_dict() -> dict: _THAI_WSD_DICT["word"].append(i) _THAI_WSD_DICT["meaning"].append(_use) - return _THAI_WSD_DICT \ No newline at end of file + return _THAI_WSD_DICT + + +def thai_synonym() -> dict: + """ + Return Thai synonym. + \n(See: `thai_synonym\ + `_) + + :return: Thai word with part-of-speech type and synonym + :rtype: dict + """ + global _THAI_SYNONYM + if _THAI_SYNONYM == None: + import csv + _THAI_SYNONYM = {"word":[], "pos":[], "synonym":[]} + with open(get_corpus_path("thai_synonym"), newline="\n", encoding="utf-8") as csvfile: + reader = csv.DictReader(csvfile, delimiter=",") + for row in reader: + _THAI_SYNONYM["word"].append(row["word"]) + _THAI_SYNONYM["pos"].append(row["pos"]) + _THAI_SYNONYM["synonym"].append(row["synonym"].split("|")) + + return _THAI_SYNONYM diff --git a/tests/test_corpus.py b/tests/test_corpus.py index eb82a2928..bcead7626 100644 --- a/tests/test_corpus.py +++ b/tests/test_corpus.py @@ -20,6 +20,7 @@ thai_negations, thai_stopwords, thai_syllables, + thai_synonym, thai_words, tnc, ttc, @@ -44,6 +45,7 @@ def test_corpus(self): self.assertIsInstance(countries(), frozenset) self.assertIsInstance(provinces(), frozenset) self.assertIsInstance(provinces(details=True), list) + self.assertIsInstance(thai_synonym(), dict) self.assertEqual( len(provinces(details=False)), len(provinces(details=True)) )