Skip to content

Commit 34ab090

Browse files
authored
Merge pull request #635 from PyThaiNLP/Add-th-fr-mt
Add Thai-French Machine Translation
2 parents e418199 + dfe3469 commit 34ab090

File tree

5 files changed

+81
-20
lines changed

5 files changed

+81
-20
lines changed

docs/api/translate.rst

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,9 @@ Modules
99

1010
.. autoclass:: Translate
1111
:members:
12-
.. autofunction:: download_model_all
13-
.. autoclass:: EnThTranslator
14-
:members: translate
15-
.. autoclass:: ThEnTranslator
16-
:members: translate
17-
.. autoclass:: ThZhTranslator
18-
:members: translate
19-
.. autoclass:: ZhThTranslator
20-
:members: translate
12+
.. autofunction:: pythainlp.translate.en_th.download_model_all
13+
.. autoclass:: pythainlp.translate.en_th.EnThTranslator
14+
.. autoclass:: pythainlp.translate.en_th.ThEnTranslator
15+
.. autoclass:: pythainlp.translate.zh_th.ThZhTranslator
16+
.. autoclass:: pythainlp.translate.zh_th.ZhThTranslator
17+
.. autoclass:: pythainlp.translate.th_fr.ThFrTranslator

pythainlp/translate/__init__.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,21 +4,13 @@
44
"""
55

66
__all__ = [
7-
"EnThTranslator",
8-
"ThEnTranslator",
9-
"download_model_all",
107
"ThZhTranslator",
118
"ZhThTranslator",
129
"Translate"
1310
]
1411

1512
from pythainlp.translate.core import Translate
1613

17-
from pythainlp.translate.en_th import (
18-
EnThTranslator,
19-
ThEnTranslator,
20-
download_model_all,
21-
)
2214
from pythainlp.translate.zh_th import (
2315
ThZhTranslator,
2416
ZhThTranslator,

pythainlp/translate/core.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ class Translate:
1313
* *en* - *th* - English to Thai
1414
* *th* - *zh* - Thai to Chinese
1515
* *zh* - *th* - Chinese to Thai
16+
* *th* - *fr* - Thai to French
1617
1718
:Example:
1819
@@ -34,6 +35,7 @@ def __init__(self, src_lang: str, target_lang: str) -> None:
3435
* *en* - *th* - English to Thai
3536
* *th* - *zh* - Thai to Chinese
3637
* *zh* - *th* - Chinese to Thai
38+
* *th* - *fr* - Thai to French
3739
3840
:Example:
3941
@@ -61,6 +63,9 @@ def load_model(self, src_lang: str, target_lang: str):
6163
elif src_lang == "zh" and target_lang == "th":
6264
from pythainlp.translate.zh_th import ZhThTranslator
6365
self.model = ZhThTranslator()
66+
elif src_lang == "th" and target_lang == "fr":
67+
from pythainlp.translate.th_fr import ThFrTranslator
68+
self.model = ThFrTranslator()
6469
else:
6570
raise ValueError("Not support language!")
6671

pythainlp/translate/th_fr.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
# -*- coding: utf-8 -*-
2+
"""
3+
Thai-French Machine Translation
4+
5+
Trained by OPUS Corpus
6+
7+
Model from Language Technology Research Group at the University of Helsinki
8+
9+
BLEU 20.4
10+
11+
- Huggingface https://huggingface.co/Helsinki-NLP/opus-mt-th-fr
12+
"""
13+
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
14+
15+
16+
class ThFrTranslator:
17+
"""
18+
Thai-French Machine Translation
19+
20+
Trained by OPUS Corpus
21+
22+
Model from Language Technology Research Group at the University of Helsinki
23+
24+
BLEU 20.4
25+
26+
- Huggingface https://huggingface.co/Helsinki-NLP/opus-mt-th-fr
27+
"""
28+
def __init__(self, pretrained: str = "Helsinki-NLP/opus-mt-th-fr") -> None:
29+
self.tokenizer_thzh = AutoTokenizer.from_pretrained(pretrained)
30+
self.model_thzh = AutoModelForSeq2SeqLM.from_pretrained(pretrained)
31+
32+
def translate(self, text: str) -> str:
33+
"""
34+
Translate text from Thai to French
35+
36+
:param str text: input text in source language
37+
:return: translated text in target language
38+
:rtype: str
39+
40+
:Example:
41+
42+
Translate text from Thai to French::
43+
44+
from pythainlp.translate.th_fr import ThFrTranslator
45+
46+
thfr = ThFrTranslator()
47+
48+
thfr.translate("ทดสอบระบบ")
49+
# output: "Test du système."
50+
51+
"""
52+
self.translated = self.model_thzh.generate(
53+
**self.tokenizer_thzh(text, return_tensors="pt", padding=True)
54+
)
55+
return [
56+
self.tokenizer_thzh.decode(
57+
t, skip_special_tokens=True
58+
) for t in self.translated
59+
][0]

tests/test_translate.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,15 @@
33
import unittest
44

55
from pythainlp.translate import (
6-
EnThTranslator,
7-
ThEnTranslator,
86
ThZhTranslator,
97
ZhThTranslator,
10-
download_model_all,
118
Translate
129
)
10+
from pythainlp.translate.en_th import (
11+
EnThTranslator,
12+
ThEnTranslator,
13+
download_model_all
14+
)
1315
from pythainlp.corpus import remove
1416

1517

@@ -65,5 +67,11 @@ def test_translate(self):
6567
"我爱你",
6668
)
6769
)
70+
self.th_fr_translator = Translate('th', 'fr')
71+
self.assertIsNotNone(
72+
self.th_fr_translator.translate(
73+
"ทดสอบระบบ",
74+
)
75+
)
6876
with self.assertRaises(ValueError):
6977
self.th_cat_translator = Translate('th', 'cat')

0 commit comments

Comments
 (0)