From 2bd4eef60339b8f4637c7116610986f31d967438 Mon Sep 17 00:00:00 2001 From: Wannaphong Phatthiyaphaibun Date: Sat, 29 Jan 2022 12:32:25 +0700 Subject: [PATCH 1/8] PyThaiNLP v3.0.0 --- README.md | 10 +++++++--- README_TH.md | 4 ++-- SECURITY.md | 5 +++-- docs/notes/installation.rst | 4 ++++ pythainlp/__init__.py | 2 +- pythainlp/util/thai.py | 8 ++++---- setup.cfg | 2 +- setup.py | 2 +- 8 files changed, 23 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index 8f88eb61d..dd42f88c9 100644 --- a/README.md +++ b/README.md @@ -19,12 +19,12 @@ PyThaiNLP เป็นไลบารีภาษาไพทอนสำหร **News** ->Since PyThaiNLP 3.0, We will end support PyThaiNLP on Python 3.6. Python 3.6 users can use PyThaiNLP 2.3.1 +>Since PyThaiNLP 3.0, We will end support PyThaiNLP on Python 3.6. Python 3.6 users can use PyThaiNLP 2.3.2 | Version | Description | Status | |:------:|:--:|:------:| -| [2.3.2](https://github.com/PyThaiNLP/pythainlp/releases) | Stable | [Change Log](https://github.com/PyThaiNLP/pythainlp/issues/445) | -| [`dev`](https://github.com/PyThaiNLP/pythainlp/tree/dev) | Release Candidate for 3.0 | [Change Log](https://github.com/PyThaiNLP/pythainlp/issues/545) | +| [3.0](https://github.com/PyThaiNLP/pythainlp/releases) | Stable | [Change Log](https://github.com/PyThaiNLP/pythainlp/issues/545) | +| [`dev`](https://github.com/PyThaiNLP/pythainlp/tree/dev) | Release Candidate for 3.1 | [Change Log](https://github.com/PyThaiNLP/pythainlp/issues/545) | ## Getting Started @@ -134,6 +134,10 @@ thainlp help - Please do fork and create a pull request :) - For style guide and other information, including references to algorithms we use, please refer to our [contributing](https://github.com/PyThaiNLP/pythainlp/blob/dev/CONTRIBUTING.md) page. +## Who uses PyThaiNLP? + +You can read [INTHEWILD.md](https://github.com/PyThaiNLP/pythainlp/blob/dev/INTHEWILD.md). + ## Citations diff --git a/README_TH.md b/README_TH.md index 66a5e9c82..76f4e6c92 100644 --- a/README_TH.md +++ b/README_TH.md @@ -20,8 +20,8 @@ PyThaiNLP เป็นไลบารีภาษาไพทอนสำหร | รุ่น | คำอธิบาย | สถานะ | |:------:|:--:|:------:| -| [2.3.2](https://github.com/PyThaiNLP/pythainlp/releases) | Stable | [Change Log](https://github.com/PyThaiNLP/pythainlp/issues/445) | -| [`dev`](https://github.com/PyThaiNLP/pythainlp/tree/dev) | Release Candidate for 3.0 | [Change Log](https://github.com/PyThaiNLP/pythainlp/issues/545) | +| [3.0](https://github.com/PyThaiNLP/pythainlp/releases) | Stable | [Change Log](https://github.com/PyThaiNLP/pythainlp/issues/545 | +| [`dev`](https://github.com/PyThaiNLP/pythainlp/tree/dev) | Release Candidate for 3.1 | [Change Log](https://github.com/PyThaiNLP/pythainlp/issues/545) | ติดตามพวกเราบน [PyThaiNLP Facebook page](https://www.facebook.com/pythainlp/) เพื่อรับข่าวสารเพิ่มเติม diff --git a/SECURITY.md b/SECURITY.md index 3a620ad0f..68c92ddb7 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -4,8 +4,9 @@ | Version | Supported | | ------- | ------------------ | +| 3.0.x | :white_check_mark: | | 2.3.x | :white_check_mark: | -| 2.2.x | :white_check_mark: | -| 2.1.x | :white_check_mark: | +| 2.2.x | :x: | +| 2.1.x | :x: | | 2.0.x | :x: | | < 2.0 | :x: | diff --git a/docs/notes/installation.rst b/docs/notes/installation.rst index 84b52c8a4..2c5024616 100644 --- a/docs/notes/installation.rst +++ b/docs/notes/installation.rst @@ -27,7 +27,11 @@ where ``extras`` can be - ``mt5`` (to mt5 models for Thai text summarizer) - ``wordnet`` (to support wordnet) - ``spell`` (to support phunspell & symspellpy) + - ``generate`` (to support text generate with umlfit or thai2fit) - ``tltk`` (to support tltk) + - ``textaugment`` (to support text augmentation) + - ``oskut`` (to support OSKUT) + - ``nlpo3`` (to support nlpo3 enging) - ``full`` (install everything) For dependency details, look at `extras` variable in `setup.py `_. diff --git a/pythainlp/__init__.py b/pythainlp/__init__.py index 615b4e97a..dfb9bd5cc 100644 --- a/pythainlp/__init__.py +++ b/pythainlp/__init__.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -__version__ = "3.0.0-dev0" +__version__ = "3.0.0" thai_consonants = "กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรลวศษสหฬอฮ" # 44 chars diff --git a/pythainlp/util/thai.py b/pythainlp/util/thai.py index 6db52fc83..d125e0af4 100644 --- a/pythainlp/util/thai.py +++ b/pythainlp/util/thai.py @@ -90,16 +90,16 @@ def countthai(text: str, ignore_chars: str = _DEFAULT_IGNORE_CHARS) -> float: from pythainlp.util import countthai - countthai("ไทยเอ็นแอลพี 2.3") + countthai("ไทยเอ็นแอลพี 3.0") # output: 100.0 - countthai("PyThaiNLP 2.3") + countthai("PyThaiNLP 3.0") # output: 0.0 - countthai("ใช้งาน PyThaiNLP 2.3") + countthai("ใช้งาน PyThaiNLP 3.0") # output: 40.0 - countthai("ใช้งาน PyThaiNLP 2.3", ignore_chars="") + countthai("ใช้งาน PyThaiNLP 3.0", ignore_chars="") # output: 30.0 """ if not text or not isinstance(text, str): diff --git a/setup.cfg b/setup.cfg index 5dd1f7025..d3bd97796 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 3.0.0-beta0 +current_version = 3.0.0 commit = True tag = True parse = (?P\d+)\.(?P\d+)\.(?P\d+)(\-(?P[a-z]+)(?P\d+))? diff --git a/setup.py b/setup.py index 864ea38fb..f4ac5d77a 100644 --- a/setup.py +++ b/setup.py @@ -105,7 +105,7 @@ setup( name="pythainlp", - version="3.0.0-beta0", + version="3.0.0", description="Thai Natural Language Processing library", long_description=readme, long_description_content_type="text/markdown", From 2c2da2de95e0bf0b2f48ba2819e77832fb78b1ed Mon Sep 17 00:00:00 2001 From: Wannaphong Phatthiyaphaibun Date: Sat, 29 Jan 2022 12:35:06 +0700 Subject: [PATCH 2/8] Fixed W291 in pythainlp/util/thai.py: --- pythainlp/util/thai.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pythainlp/util/thai.py b/pythainlp/util/thai.py index d125e0af4..6c9fbac4a 100644 --- a/pythainlp/util/thai.py +++ b/pythainlp/util/thai.py @@ -90,13 +90,13 @@ def countthai(text: str, ignore_chars: str = _DEFAULT_IGNORE_CHARS) -> float: from pythainlp.util import countthai - countthai("ไทยเอ็นแอลพี 3.0") + countthai("ไทยเอ็นแอลพี 3.0") # output: 100.0 - countthai("PyThaiNLP 3.0") + countthai("PyThaiNLP 3.0") # output: 0.0 - countthai("ใช้งาน PyThaiNLP 3.0") + countthai("ใช้งาน PyThaiNLP 3.0") # output: 40.0 countthai("ใช้งาน PyThaiNLP 3.0", ignore_chars="") From 389819149313714da1e9d8357607f33fc802e84e Mon Sep 17 00:00:00 2001 From: Wannaphong Phatthiyaphaibun Date: Sat, 29 Jan 2022 12:38:14 +0700 Subject: [PATCH 3/8] Fixed URL --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index f4ac5d77a..47adb15e5 100644 --- a/setup.py +++ b/setup.py @@ -154,7 +154,7 @@ ], }, project_urls={ - "Documentation": "https://pythainlp.github.io/docs/2.3/", + "Documentation": "https://pythainlp.github.io/docs/3.0/", "Tutorials": "https://pythainlp.github.io/tutorials/", "Source Code": "https://github.com/PyThaiNLP/pythainlp", "Bug Tracker": "https://github.com/PyThaiNLP/pythainlp/issues", From ee32b2e48acd47f1f1ff96482abf20f3d1818fc4 Mon Sep 17 00:00:00 2001 From: Wannaphong Phatthiyaphaibun Date: Sat, 29 Jan 2022 19:43:49 +0700 Subject: [PATCH 4/8] Add load wordnet to tests --- tests/__init__.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/__init__.py b/tests/__init__.py index 3b60f0c32..73dc14d18 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -6,9 +6,12 @@ """ import sys import unittest +import nltk sys.path.append("../pythainlp") +nltk.download('omw-1.4') # load wordnet + loader = unittest.TestLoader() testSuite = loader.discover("tests") testRunner = unittest.TextTestRunner(verbosity=1) From c40228ba23c4bbb94b8a43e0d12828bdb5c37562 Mon Sep 17 00:00:00 2001 From: Wannaphong Phatthiyaphaibun Date: Sat, 29 Jan 2022 21:11:56 +0700 Subject: [PATCH 5/8] Add load wordnet --- tests/__init__.py | 3 --- tests/test_augment.py | 2 ++ tests/test_corpus.py | 2 ++ 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/__init__.py b/tests/__init__.py index 73dc14d18..3b60f0c32 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -6,12 +6,9 @@ """ import sys import unittest -import nltk sys.path.append("../pythainlp") -nltk.download('omw-1.4') # load wordnet - loader = unittest.TestLoader() testSuite = loader.discover("tests") testRunner = unittest.TextTestRunner(verbosity=1) diff --git a/tests/test_augment.py b/tests/test_augment.py index 4048f6d46..9cfb37abc 100644 --- a/tests/test_augment.py +++ b/tests/test_augment.py @@ -9,6 +9,7 @@ Thai2fitAug, LTW2VAug ) +import nltk class TestTextaugmentPackage(unittest.TestCase): @@ -17,6 +18,7 @@ def setUp(self): self.text2 = "เราอยู่ที่มหาวิทยาลัยขอนแก่น" def test_WordNetAug(self): + nltk.download('omw-1.4') # load wordnet wordnetaug = WordNetAug() self.assertIsNotNone(wordnetaug.augment(self.text)) self.assertIsNotNone(wordnetaug.find_synonyms("ผม", pos=None)) diff --git a/tests/test_corpus.py b/tests/test_corpus.py index 792f70fd3..5b3062872 100644 --- a/tests/test_corpus.py +++ b/tests/test_corpus.py @@ -27,6 +27,7 @@ ) from pythainlp.corpus.util import revise_newmm_default_wordset from requests import Response +import nltk class TestCorpusPackage(unittest.TestCase): @@ -120,6 +121,7 @@ def test_ttc(self): self.assertIsNotNone(ttc.unigram_word_freqs()) def test_wordnet(self): + nltk.download('omw-1.4') # load wordnet self.assertIsInstance(wordnet.langs(), list) self.assertIn("tha", wordnet.langs()) From 626ef59a6fb3dd926dea6b8fb514c8f3815b0bfc Mon Sep 17 00:00:00 2001 From: Wannaphong Phatthiyaphaibun Date: Sat, 29 Jan 2022 21:47:16 +0700 Subject: [PATCH 6/8] Update nltk download --- tests/test_augment.py | 2 +- tests/test_corpus.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_augment.py b/tests/test_augment.py index 9cfb37abc..253ae14d9 100644 --- a/tests/test_augment.py +++ b/tests/test_augment.py @@ -18,7 +18,7 @@ def setUp(self): self.text2 = "เราอยู่ที่มหาวิทยาลัยขอนแก่น" def test_WordNetAug(self): - nltk.download('omw-1.4') # load wordnet + nltk.download('omw-1.4', force=True) # load wordnet wordnetaug = WordNetAug() self.assertIsNotNone(wordnetaug.augment(self.text)) self.assertIsNotNone(wordnetaug.find_synonyms("ผม", pos=None)) diff --git a/tests/test_corpus.py b/tests/test_corpus.py index 5b3062872..2152b1bcf 100644 --- a/tests/test_corpus.py +++ b/tests/test_corpus.py @@ -121,7 +121,7 @@ def test_ttc(self): self.assertIsNotNone(ttc.unigram_word_freqs()) def test_wordnet(self): - nltk.download('omw-1.4') # load wordnet + nltk.download('omw-1.4', force=True) # load wordnet self.assertIsInstance(wordnet.langs(), list) self.assertIn("tha", wordnet.langs()) From 02a1063370d3ccfa1983503b7189437d14ca5268 Mon Sep 17 00:00:00 2001 From: Wannaphong Phatthiyaphaibun Date: Sat, 29 Jan 2022 23:02:16 +0700 Subject: [PATCH 7/8] Add nltk.downloader omw-1.4 to workflows --- .github/workflows/macos-test.yml | 1 + .github/workflows/test.yml | 1 + 2 files changed, 2 insertions(+) diff --git a/.github/workflows/macos-test.yml b/.github/workflows/macos-test.yml index 46c32ddf2..79df394ba 100644 --- a/.github/workflows/macos-test.yml +++ b/.github/workflows/macos-test.yml @@ -47,6 +47,7 @@ jobs: if [ -f docker_requirements.txt ]; then pip install -r docker_requirements.txt; fi pip install deepcut pip install .[full] + python -m nltk.downloader omw-1.4 - name: Test shell: bash -l {0} env: diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index b42e21a35..0a60973e6 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -33,6 +33,7 @@ jobs: if [ -f docker_requirements.txt ]; then pip install -r docker_requirements.txt; fi pip install deepcut pip install .[full] + python -m nltk.downloader omw-1.4 - name: Test env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} From df480431db315c9af2a496aa288a6b0734c8829b Mon Sep 17 00:00:00 2001 From: Wannaphong Phatthiyaphaibun Date: Sat, 29 Jan 2022 23:27:50 +0700 Subject: [PATCH 8/8] Update test_corpus.py --- tests/test_corpus.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_corpus.py b/tests/test_corpus.py index 2152b1bcf..9e23eb5f0 100644 --- a/tests/test_corpus.py +++ b/tests/test_corpus.py @@ -122,7 +122,7 @@ def test_ttc(self): def test_wordnet(self): nltk.download('omw-1.4', force=True) # load wordnet - self.assertIsInstance(wordnet.langs(), list) + self.assertIsNotNone(wordnet.langs()) self.assertIn("tha", wordnet.langs()) self.assertEqual(