File tree Expand file tree Collapse file tree 4 files changed +36
-0
lines changed Expand file tree Collapse file tree 4 files changed +36
-0
lines changed Original file line number Diff line number Diff line change @@ -49,6 +49,7 @@ Modules
4949.. autofunction :: thaiword_to_num
5050.. autofunction :: thaiword_to_time
5151.. autofunction :: time_to_thaiword
52+ .. autofunction :: tis620_to_utf8
5253.. autofunction :: tone_detector
5354.. autofunction :: words_to_num
5455.. autofunction :: nectec_to_ipa
Original file line number Diff line number Diff line change 6565 "nectec_to_ipa" ,
6666 "ipa_to_rtgs" ,
6767 "remove_tone_ipa" ,
68+ "tis620_to_utf8" ,
6869]
6970
7071from pythainlp .util .collate import collate
121122 syllable_open_close_detector ,
122123)
123124from pythainlp .util .phoneme import nectec_to_ipa , ipa_to_rtgs , remove_tone_ipa
125+ from pythainlp .util .encoding import tis620_to_utf8
Original file line number Diff line number Diff line change 1+ # -*- coding_utf-8 -*-
2+ # Copyright (C) 2016-2023 PyThaiNLP Project
3+ #
4+ # Licensed under the Apache License, Version 2.0 (the "License");
5+ # you may not use this file except in compliance with the License.
6+ # You may obtain a copy of the License at
7+ #
8+ # http://www.apache.org/licenses/LICENSE-2.0
9+ #
10+ # Unless required by applicable law or agreed to in writing, software
11+ # distributed under the License is distributed on an "AS IS" BASIS,
12+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+ # See the License for the specific language governing permissions and
14+ # limitations under the License.
15+ def tis620_to_utf8 (text : str )-> str :
16+ """
17+ Convert TIS-620 to UTF-8
18+
19+ :param str text: Text that use TIS-620 encoding
20+ :return: Text that use UTF-8 encoding
21+ :rtype: str
22+
23+ :Example:
24+
25+ from pythainlp.util import tis620_to_utf8
26+ tis620_to_utf8("¡ÃзÃÇ§ÍØµÊÒË¡ÃÃÁ")
27+ # output: 'กระทรวงอุตสาหกรรม'
28+ """
29+ return text .encode ('cp1252' ).decode ('tis-620' )
Original file line number Diff line number Diff line change 5757 nectec_to_ipa ,
5858 ipa_to_rtgs ,
5959 remove_tone_ipa ,
60+ tis620_to_utf8 ,
6061)
6162
6263
@@ -840,3 +841,6 @@ def test_ipa_to_rtgs(self):
840841
841842 def test_remove_tone_ipa (self ):
842843 self .assertEqual (remove_tone_ipa ("laː˦˥.sa˨˩.maj˩˩˦" ), "laː.sa.maj" )
844+
845+ def test_tis620_to_utf8 (self ):
846+ self .assertEqual (tis620_to_utf8 ("¡ÃзÃÇ§ÍØµÊÒË¡ÃÃÁ" ), "กระทรวงอุตสาหกรรม" )
You can’t perform that action at this time.
0 commit comments