File tree Expand file tree Collapse file tree 2 files changed +18
-11
lines changed Expand file tree Collapse file tree 2 files changed +18
-11
lines changed Original file line number Diff line number Diff line change 106106}
107107
108108_punctuation_and_digits = {
109+ # ฯ can has two meanings in ISO 11940.
110+ # If it is for abbrevation, it is paiyan noi.
111+ # If it is for sentence termination, it is angkhan diao.
112+ # Without semantic analysis, they cannot be distinguished from each other.
113+ # In this simple implementation, we decided to always treat ฯ as paiyan noi.
114+ # We commented out angkhan diao line to remove it from the dictionary
115+ # and avoid having duplicate keys.
109116 "ๆ" : "«" ,
110- "ฯ" : "ǂ" ,
117+ "ฯ" : "ǂ" , # paiyan noi: U+01C2 ǂ Alveolar Click; ICU uses ‡ (double dagger)
111118 "๏" : "§" ,
112- "ฯ" : "ǀ" ,
113- "๚" : "ǁ" ,
119+ # "ฯ": "ǀ", # angkhan diao: U+01C0 ǀ Dental Click; ICU uses | (vertical bar)
120+ "๚" : "ǁ" , # angkhan khu: U+01C1 ǁ Lateral Click; ICU uses || (two vertical bars)
114121 "๛" : "»" ,
115122 "๐" : "0" ,
116123 "๑" : "1" ,
130137 ** _tone_marks ,
131138 ** _punctuation_and_digits ,
132139}
133- _list_k = _all_dict .keys ()
140+ _keys_set = _all_dict .keys ()
134141
135142
136143def transliterate (word : str ) -> str :
137144 """
138145 Use ISO 11940 for transliteration
139146 :param str text: Thai text to be transliterated.
140- :return: A string of IPA indicating how the text should be pronounced.
147+ :return: A string indicating how the text should be pronounced, according to ISO 11940 .
141148 """
142- _new = ""
149+ _str = ""
143150 for i in word :
144- if i in _list_k :
145- _new += _all_dict [i ]
151+ if i in _keys_set :
152+ _str += _all_dict [i ]
146153 else :
147- _new += i
148- return _new
154+ _str += i
155+ return _str
Original file line number Diff line number Diff line change @@ -26,7 +26,7 @@ search = __version__ = "{current_version}"
2626replace = __version__ = " {new_version}"
2727
2828[metadata]
29- description-file = README.md
29+ description_file = README.md
3030
3131[coverage:run]
3232source = pythainlp
You can’t perform that action at this time.
0 commit comments