From 04ff6039bdaba8be3afe1cfc01a59ec337530ab9 Mon Sep 17 00:00:00 2001 From: Arthit Suriyawongkul Date: Sat, 12 Sep 2020 21:24:10 +0100 Subject: [PATCH] Add family names --- docs/api/corpus.rst | 1 + pythainlp/corpus/__init__.py | 2 ++ pythainlp/corpus/common.py | 22 +++++++++++++++++++++- tests/test_corpus.py | 3 +++ 4 files changed, 27 insertions(+), 1 deletion(-) diff --git a/docs/api/corpus.rst b/docs/api/corpus.rst index 0a727e3b5..dbe298ec1 100644 --- a/docs/api/corpus.rst +++ b/docs/api/corpus.rst @@ -19,6 +19,7 @@ Modules .. autofunction:: thai_words .. autofunction:: thai_syllables .. autofunction:: thai_negations +.. autofunction:: thai_family_names .. autofunction:: thai_female_names .. autofunction:: thai_male_names .. autofunction:: pythainlp.corpus.conceptnet.edges diff --git a/pythainlp/corpus/__init__.py b/pythainlp/corpus/__init__.py index d89c4f3cc..509adb481 100644 --- a/pythainlp/corpus/__init__.py +++ b/pythainlp/corpus/__init__.py @@ -18,6 +18,7 @@ "get_corpus_path", "provinces", "remove", + "thai_family_names", "thai_female_names", "thai_male_names", "thai_negations", @@ -86,6 +87,7 @@ def corpus_db_path() -> str: from pythainlp.corpus.common import ( countries, provinces, + thai_family_names, thai_female_names, thai_male_names, thai_negations, diff --git a/pythainlp/corpus/common.py b/pythainlp/corpus/common.py index e56565c11..b85f009b5 100644 --- a/pythainlp/corpus/common.py +++ b/pythainlp/corpus/common.py @@ -6,6 +6,7 @@ __all__ = [ "countries", "provinces", + "thai_family_names", "thai_female_names", "thai_male_names", "thai_negations", @@ -14,9 +15,10 @@ "thai_words", ] -from pythainlp.corpus import get_corpus from typing import Union +from pythainlp.corpus import get_corpus + _THAI_COUNTRIES = set() _THAI_COUNTRIES_FILENAME = "countries_th.txt" @@ -36,6 +38,8 @@ _THAI_NEGATIONS = set() _THAI_NEGATIONS_FILENAME = "negations_th.txt" +_THAI_FAMLIY_NAMES = set() +_THAI_FAMLIY_NAMES_FILENAME = "family_names_th.txt" _THAI_FEMALE_NAMES = set() _THAI_FEMALE_NAMES_FILENAME = "person_names_female_th.txt" _THAI_MALE_NAMES = set() @@ -167,6 +171,22 @@ def thai_negations() -> frozenset: return _THAI_NEGATIONS +def thai_family_names() -> frozenset: + """ + Return a frozenset of Thai family names + \n(See: `dev/pythainlp/corpus/family_names_th.txt\ + `_) + + :return: :class:`frozenset` containing Thai family names. + :rtype: :class:`frozenset` + """ + global _THAI_FAMLIY_NAMES + if not _THAI_FAMLIY_NAMES: + _THAI_FAMLIY_NAMES = get_corpus(_THAI_FAMLIY_NAMES_FILENAME) + + return _THAI_FAMLIY_NAMES + + def thai_female_names() -> frozenset: """ Return a frozenset of Thai female names diff --git a/tests/test_corpus.py b/tests/test_corpus.py index 429bbf180..b39a778ba 100644 --- a/tests/test_corpus.py +++ b/tests/test_corpus.py @@ -12,6 +12,7 @@ get_corpus_path, provinces, remove, + thai_family_names, thai_female_names, thai_male_names, thai_negations, @@ -41,6 +42,8 @@ def test_corpus(self): self.assertEqual( len(provinces(details=False)), len(provinces(details=True)) ) + self.assertIsInstance(thai_family_names(), frozenset) + self.assertIsInstance(list(thai_family_names())[0], str) self.assertIsInstance(thai_female_names(), frozenset) self.assertIsInstance(thai_male_names(), frozenset)