From 35f13e1c0a6eea411363de8d93002ee30694b894 Mon Sep 17 00:00:00 2001 From: Wannaphong Phatthiyaphaibun Date: Tue, 28 Dec 2021 12:49:00 +0700 Subject: [PATCH 1/2] Add Thai-French Machine Translation --- docs/api/translate.rst | 15 ++++---- pythainlp/translate/__init__.py | 8 ----- pythainlp/translate/core.py | 5 +++ pythainlp/translate/th_fr.py | 61 +++++++++++++++++++++++++++++++++ tests/test_translate.py | 14 ++++++-- 5 files changed, 83 insertions(+), 20 deletions(-) create mode 100644 pythainlp/translate/th_fr.py diff --git a/docs/api/translate.rst b/docs/api/translate.rst index d3565e09b..4662fea59 100644 --- a/docs/api/translate.rst +++ b/docs/api/translate.rst @@ -9,12 +9,9 @@ Modules .. autoclass:: Translate :members: -.. autofunction:: download_model_all -.. autoclass:: EnThTranslator - :members: translate -.. autoclass:: ThEnTranslator - :members: translate -.. autoclass:: ThZhTranslator - :members: translate -.. autoclass:: ZhThTranslator - :members: translate +.. autofunction:: pythainlp.translate.en_th.download_model_all +.. autoclass:: pythainlp.translate.en_th.EnThTranslator +.. autoclass:: pythainlp.translate.en_th.ThEnTranslator +.. autoclass:: pythainlp.translate.zh_th.ThZhTranslator +.. autoclass:: pythainlp.translate.zh_th.ZhThTranslator +.. autoclass:: pythainlp.translate.th_fr.ThFrTranslator diff --git a/pythainlp/translate/__init__.py b/pythainlp/translate/__init__.py index 93a473277..aab2fb485 100644 --- a/pythainlp/translate/__init__.py +++ b/pythainlp/translate/__init__.py @@ -4,9 +4,6 @@ """ __all__ = [ - "EnThTranslator", - "ThEnTranslator", - "download_model_all", "ThZhTranslator", "ZhThTranslator", "Translate" @@ -14,11 +11,6 @@ from pythainlp.translate.core import Translate -from pythainlp.translate.en_th import ( - EnThTranslator, - ThEnTranslator, - download_model_all, -) from pythainlp.translate.zh_th import ( ThZhTranslator, ZhThTranslator, diff --git a/pythainlp/translate/core.py b/pythainlp/translate/core.py index 48702ee43..6c4d178a1 100644 --- a/pythainlp/translate/core.py +++ b/pythainlp/translate/core.py @@ -13,6 +13,7 @@ class Translate: * *en* - *th* - English to Thai * *th* - *zh* - Thai to Chinese * *zh* - *th* - Chinese to Thai + * *th* - *fr* - Thai to French :Example: @@ -34,6 +35,7 @@ def __init__(self, src_lang: str, target_lang: str) -> None: * *en* - *th* - English to Thai * *th* - *zh* - Thai to Chinese * *zh* - *th* - Chinese to Thai + * *th* - *fr* - Thai to French :Example: @@ -61,6 +63,9 @@ def load_model(self, src_lang: str, target_lang: str): elif src_lang == "zh" and target_lang == "th": from pythainlp.translate.zh_th import ZhThTranslator self.model = ZhThTranslator() + elif src_lang == "th" and target_lang == "fr": + from pythainlp.translate.th_fr import ThFrTranslator + self.model = ThFrTranslator() else: raise ValueError("Not support language!") diff --git a/pythainlp/translate/th_fr.py b/pythainlp/translate/th_fr.py new file mode 100644 index 000000000..311e61b08 --- /dev/null +++ b/pythainlp/translate/th_fr.py @@ -0,0 +1,61 @@ +# -*- coding: utf-8 -*- +""" +Thai-French Machine Translation + +Trained by OPUS Corpus + +Model from Language Technology Research Group at the University of Helsinki + +BLEU 20.4 + +- GitHub: https://github.com/Helsinki-NLP/OPUS-MT-train/tree/master/models/th-fr +- Huggingface https://huggingface.co/Helsinki-NLP/opus-mt-th-fr +""" +from transformers import AutoTokenizer, AutoModelForSeq2SeqLM + + +class ThFrTranslator: + """ + Thai-French Machine Translation + + Trained by OPUS Corpus + + Model from Language Technology Research Group at the University of Helsinki + + BLEU 20.4 + + - GitHub: https://github.com/Helsinki-NLP/OPUS-MT-train/tree/master/models/th-fr + - Huggingface https://huggingface.co/Helsinki-NLP/opus-mt-th-fr + """ + def __init__(self, pretrained: str = "Helsinki-NLP/opus-mt-th-fr") -> None: + self.tokenizer_thzh = AutoTokenizer.from_pretrained(pretrained) + self.model_thzh = AutoModelForSeq2SeqLM.from_pretrained(pretrained) + + def translate(self, text: str) -> str: + """ + Translate text from Thai to French + + :param str text: input text in source language + :return: translated text in target language + :rtype: str + + :Example: + + Translate text from Thai to French:: + + from pythainlp.translate.th_fr import ThFrTranslator + + thfr = ThFrTranslator() + + thfr.translate("ทดสอบระบบ") + # output: "Test du système." + + """ + self.translated = self.model_thzh.generate( + **self.tokenizer_thzh(text, return_tensors="pt", padding=True) + ) + return [ + self.tokenizer_thzh.decode( + t, skip_special_tokens=True + ) for t in self.translated + ][0] diff --git a/tests/test_translate.py b/tests/test_translate.py index c3040d369..fafbd12f8 100644 --- a/tests/test_translate.py +++ b/tests/test_translate.py @@ -3,13 +3,15 @@ import unittest from pythainlp.translate import ( - EnThTranslator, - ThEnTranslator, ThZhTranslator, ZhThTranslator, - download_model_all, Translate ) +from pythainlp.translate.en_th import ( + EnThTranslator, + ThEnTranslator, + download_model_all +) from pythainlp.corpus import remove @@ -65,5 +67,11 @@ def test_translate(self): "我爱你", ) ) + self.th_fr_translator = Translate('th', 'fr') + self.assertIsNotNone( + self.th_fr_translator.translate( + "ทดสอบระบบ", + ) + ) with self.assertRaises(ValueError): self.th_cat_translator = Translate('th', 'cat') From dfe3469c64ff3ef6d52fdafdf47fd9f31995001a Mon Sep 17 00:00:00 2001 From: Wannaphong Phatthiyaphaibun Date: Tue, 28 Dec 2021 12:50:51 +0700 Subject: [PATCH 2/2] Update th_fr.py --- pythainlp/translate/th_fr.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pythainlp/translate/th_fr.py b/pythainlp/translate/th_fr.py index 311e61b08..9a8a83797 100644 --- a/pythainlp/translate/th_fr.py +++ b/pythainlp/translate/th_fr.py @@ -8,7 +8,6 @@ BLEU 20.4 -- GitHub: https://github.com/Helsinki-NLP/OPUS-MT-train/tree/master/models/th-fr - Huggingface https://huggingface.co/Helsinki-NLP/opus-mt-th-fr """ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM @@ -24,7 +23,6 @@ class ThFrTranslator: BLEU 20.4 - - GitHub: https://github.com/Helsinki-NLP/OPUS-MT-train/tree/master/models/th-fr - Huggingface https://huggingface.co/Helsinki-NLP/opus-mt-th-fr """ def __init__(self, pretrained: str = "Helsinki-NLP/opus-mt-th-fr") -> None: