Skip to content

Commit 3dd4351

Browse files
authored
Merge pull request #825 from PyThaiNLP/add-thai_synonym
Add thai_synonym
2 parents d0fdb77 + 99cd482 commit 3dd4351

File tree

4 files changed

+33
-3
lines changed

4 files changed

+33
-3
lines changed

docs/api/corpus.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ Modules
2121
.. autofunction:: thai_words
2222
.. autofunction:: thai_wsd_dict
2323
.. autofunction:: thai_orst_words
24+
.. autofunction:: thai_synonym
2425
.. autofunction:: thai_syllables
2526
.. autofunction:: thai_negations
2627
.. autofunction:: thai_family_names

pythainlp/corpus/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
"thai_female_names",
3838
"thai_male_names",
3939
"thai_negations",
40+
"thai_synonym",
4041
"thai_stopwords",
4142
"thai_syllables",
4243
"thai_words",
@@ -110,6 +111,7 @@ def corpus_db_path() -> str:
110111
thai_female_names,
111112
thai_male_names,
112113
thai_negations,
114+
thai_synonym,
113115
thai_stopwords,
114116
thai_syllables,
115117
thai_words,

pythainlp/corpus/common.py

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
"thai_words",
2929
"thai_dict",
3030
"thai_wsd_dict",
31+
"thai_synonym",
3132
]
3233

3334
from typing import FrozenSet, List, Union
@@ -64,6 +65,7 @@
6465

6566
_THAI_DICT = {}
6667
_THAI_WSD_DICT = {}
68+
_THAI_SYNONYM = None
6769

6870

6971
def countries() -> FrozenSet[str]:
@@ -270,7 +272,7 @@ def thai_dict() -> dict:
270272
<https://pythainlp.github.io/pythainlp-corpus/thai_dict.html>`_)
271273
272274
:return: Thai word with part-of-speech type and definition
273-
:rtype: :class:`frozenset`
275+
:rtype: dict
274276
"""
275277
global _THAI_DICT
276278
if _THAI_DICT == {}:
@@ -292,7 +294,7 @@ def thai_wsd_dict() -> dict:
292294
<https://pythainlp.github.io/pythainlp-corpus/thai_dict.html>`_)
293295
294296
:return: Thai word with part-of-speech type and definition
295-
:rtype: :class:`frozenset`
297+
:rtype: dict
296298
"""
297299
global _THAI_WSD_DICT
298300
if _THAI_WSD_DICT == {}:
@@ -308,4 +310,27 @@ def thai_wsd_dict() -> dict:
308310
_THAI_WSD_DICT["word"].append(i)
309311
_THAI_WSD_DICT["meaning"].append(_use)
310312

311-
return _THAI_WSD_DICT
313+
return _THAI_WSD_DICT
314+
315+
316+
def thai_synonym() -> dict:
317+
"""
318+
Return Thai synonym.
319+
\n(See: `thai_synonym\
320+
<https://pythainlp.github.io/pythainlp-corpus/thai_synonym.html>`_)
321+
322+
:return: Thai word with part-of-speech type and synonym
323+
:rtype: dict
324+
"""
325+
global _THAI_SYNONYM
326+
if _THAI_SYNONYM == None:
327+
import csv
328+
_THAI_SYNONYM = {"word":[], "pos":[], "synonym":[]}
329+
with open(get_corpus_path("thai_synonym"), newline="\n", encoding="utf-8") as csvfile:
330+
reader = csv.DictReader(csvfile, delimiter=",")
331+
for row in reader:
332+
_THAI_SYNONYM["word"].append(row["word"])
333+
_THAI_SYNONYM["pos"].append(row["pos"])
334+
_THAI_SYNONYM["synonym"].append(row["synonym"].split("|"))
335+
336+
return _THAI_SYNONYM

tests/test_corpus.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
thai_negations,
2121
thai_stopwords,
2222
thai_syllables,
23+
thai_synonym,
2324
thai_words,
2425
tnc,
2526
ttc,
@@ -44,6 +45,7 @@ def test_corpus(self):
4445
self.assertIsInstance(countries(), frozenset)
4546
self.assertIsInstance(provinces(), frozenset)
4647
self.assertIsInstance(provinces(details=True), list)
48+
self.assertIsInstance(thai_synonym(), dict)
4749
self.assertEqual(
4850
len(provinces(details=False)), len(provinces(details=True))
4951
)

0 commit comments

Comments
 (0)