Skip to content

Commit f3b2fbf

Browse files
authored
Merge pull request #521 from PyThaiNLP/revert-513-add-thai-keyboard-distance
Revert "Add thai keyboard distance"
2 parents 3103449 + 0dd7406 commit f3b2fbf

File tree

3 files changed

+10
-107
lines changed

3 files changed

+10
-107
lines changed

pythainlp/util/__init__.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@
3333
"text_to_arabic_digit",
3434
"text_to_thai_digit",
3535
"thai_digit_to_arabic_digit",
36-
"thai_keyboard_dist",
3736
"thai_strftime",
3837
"thai_time",
3938
"thai_to_eng",
@@ -56,11 +55,6 @@
5655
text_to_thai_digit,
5756
thai_digit_to_arabic_digit,
5857
)
59-
from pythainlp.util.keyboard import (
60-
eng_to_thai,
61-
thai_keyboard_dist,
62-
thai_to_eng,
63-
)
6458
from pythainlp.util.emojiconv import emoji_to_thai
6559
from pythainlp.util.keyboard import eng_to_thai, thai_to_eng
6660
from pythainlp.util.keywords import find_keyword, rank

pythainlp/util/keyboard.py

Lines changed: 2 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
# -*- coding: utf-8 -*-
22
"""
3-
Functions related to keyboard layout.
3+
Correct text in one language that is incorrectly-typed
4+
with a keyboard layout in another language.
45
"""
5-
66
EN_TH_KEYB_PAIRS = {
77
"Z": "(",
88
"z": "ผ",
@@ -103,19 +103,6 @@
103103
EN_TH_TRANSLATE_TABLE = str.maketrans(EN_TH_KEYB_PAIRS)
104104
TH_EN_TRANSLATE_TABLE = str.maketrans(TH_EN_KEYB_PAIRS)
105105

106-
TIS_820_2531_MOD = [
107-
["-", "ๅ", "/", "", "_", "ภ", "ถ", "ุ", "ึ", "ค", "ต", "จ", "ข", "ช"],
108-
["ๆ", "ไ", "ำ", "พ", "ะ", "ั", "ี", "ร", "น", "ย", "บ", "ล", "ฃ"],
109-
["ฟ", "ห", "ก", "ด", "เ", "้", "่", "า", "ส", "ว", "ง"],
110-
["ผ", "ป", "แ", "อ", "ิ", "ื", "ท", "ม", "ใ", "ฝ"],
111-
]
112-
TIS_820_2531_MOD_SHIFT = [
113-
["%", "+", "๑", "๒", "๓", "๔", "ู", "฿", "๕", "๖", "๗", "๘", "๙"],
114-
["๐", "\"", "ฎ", "ฑ", "ธ", "ํ", "๊", "ณ", "ฯ", "ญ", "ฐ", ",", "ฅ"],
115-
["ฤ", "ฆ", "ฏ", "โ", "ฌ", "็", "๋", "ษ", "ศ", "ซ", "."],
116-
["(", ")", "ฉ", "ฮ", "ฺ", "์", "?", "ฒ", "ฬ", "ฦ"],
117-
]
118-
119106

120107
def eng_to_thai(text: str) -> str:
121108
"""
@@ -161,63 +148,3 @@ def thai_to_eng(text: str) -> str:
161148
# output: 'Bank of Thailand'
162149
"""
163150
return text.translate(TH_EN_TRANSLATE_TABLE)
164-
165-
166-
def thai_keyboard_dist(c1: str, c2: str, shift_dist: float = 0.0) -> float:
167-
"""
168-
Calculate euclidean distance between two Thai characters
169-
according to their location on a Thai keyboard layout.
170-
171-
A modified TIS 820-2531 standard keyboard layout, which is developed
172-
from Kedmanee layout and is the most commonly used Thai keyboard layout,
173-
is used in distance calculation.
174-
175-
The modified TIS 820-2531 is TIS 820-2531 with few key extensions
176-
proposed in TIS 820-2536 draft. See Figure 4, notice grey keys, in
177-
https://www.nectec.or.th/it-standards/keyboard_layout/thai-key.html
178-
179-
Noted that the latest TIS 820-2538 has slight changes in layout from
180-
TIS 820-2531. See Figure 2, notice the Thai Baht sign and ฅ-ฃ pair, in
181-
https://www.nectec.or.th/it-standards/std820/std820.html
182-
Since TIS 820-2538 is not widely adopted by keyboard manufacturer,
183-
this function uses the de facto standard modified TIS 820-2531 instead.
184-
185-
:param str c1: first character
186-
:param str c2: second character
187-
:param str shift_dist: return value if they're shifted
188-
:return: euclidean distance between two characters
189-
:rtype: float
190-
191-
:Example:
192-
193-
from pythainlp.util import thai_keyboard_dist
194-
thai_keyboard_dist("ด", "ะ")
195-
# output: 1.4142135623730951
196-
thai_keyboard_dist("ฟ", "ฤ")
197-
# output: 0.0
198-
thai_keyboard_dist("ฟ", "ห")
199-
# output: 1.0
200-
thai_keyboard_dist("ฟ", "ก")
201-
# output: 2.0
202-
thai_keyboard_dist("ฟ", "ฤ", 0.5)
203-
# output: 0.5
204-
"""
205-
def get_char_coord(
206-
ch: str, layouts=[TIS_820_2531_MOD, TIS_820_2531_MOD_SHIFT]
207-
):
208-
for layout in layouts:
209-
for row in layout:
210-
if ch in row:
211-
r = layout.index(row)
212-
c = row.index(ch)
213-
return (r, c)
214-
raise ValueError(ch + " not found in given keyboard layout")
215-
216-
coord1 = get_char_coord(c1)
217-
coord2 = get_char_coord(c2)
218-
distance = (
219-
(coord1[0] - coord2[0]) ** 2 + (coord1[1] - coord2[1]) ** 2
220-
) ** (0.5)
221-
if distance == 0 and c1 != c2:
222-
return shift_dist
223-
return distance

tests/test_util.py

Lines changed: 8 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,6 @@
4444
time_to_thaiword,
4545
thai_to_eng,
4646
thaiword_to_num,
47-
thai_keyboard_dist,
4847
)
4948

5049

@@ -158,17 +157,6 @@ def test_rank(self):
158157
rank(["แมว", "คน", "แมว"], exclude_stopwords=True)
159158
)
160159

161-
# ### pythainlp.util.keyboard
162-
163-
def test_thai_keyboard_dist(self):
164-
self.assertEqual(thai_keyboard_dist("ฟ", "ฤ"), 0.0)
165-
self.assertEqual(thai_keyboard_dist("ฟ", "ห"), 1.0)
166-
self.assertEqual(thai_keyboard_dist("ฟ", "ก"), 2.0)
167-
self.assertEqual(thai_keyboard_dist("ฟ", "ฤ", 0.5), 0.5)
168-
self.assertNotEqual(
169-
thai_keyboard_dist("๘", "๙"), thai_keyboard_dist("๙", "๐")
170-
)
171-
172160
# ### pythainlp.util.date
173161

174162
def test_date(self):
@@ -250,8 +238,7 @@ def test_time_to_thaiword(self):
250238
time_to_thaiword(time(12, 3, 0)), "สิบสองนาฬิกาสามนาที"
251239
)
252240
self.assertEqual(
253-
time_to_thaiword(time(12, 3, 1)),
254-
"สิบสองนาฬิกาสามนาทีหนึ่งวินาที",
241+
time_to_thaiword(time(12, 3, 1)), "สิบสองนาฬิกาสามนาทีหนึ่งวินาที",
255242
)
256243
self.assertEqual(
257244
time_to_thaiword(datetime(2014, 5, 22, 12, 3, 0), precision="s"),
@@ -366,16 +353,13 @@ def test_thaiword_to_date(self):
366353
now + timedelta(days=0), thaiword_to_date("วันนี้", now)
367354
)
368355
self.assertEqual(
369-
now + timedelta(days=1),
370-
thaiword_to_date("พรุ่งนี้", now),
356+
now + timedelta(days=1), thaiword_to_date("พรุ่งนี้", now),
371357
)
372358
self.assertEqual(
373-
now + timedelta(days=2),
374-
thaiword_to_date("มะรืนนี้", now),
359+
now + timedelta(days=2), thaiword_to_date("มะรืนนี้", now),
375360
)
376361
self.assertEqual(
377-
now + timedelta(days=-1),
378-
thaiword_to_date("เมื่อวาน", now),
362+
now + timedelta(days=-1), thaiword_to_date("เมื่อวาน", now),
379363
)
380364
self.assertEqual(
381365
now + timedelta(days=-2), thaiword_to_date("วานซืน", now)
@@ -554,16 +538,14 @@ def test_emoji_to_thai(self):
554538
emoji_to_thai(
555539
"จะมานั่งรถเมล์เหมือนผมก็ได้นะครับ ใกล้ชิดประชาชนดี 😀"
556540
),
557-
(
558-
"จะมานั่งรถเมล์เหมือนผมก็ได้นะครับ "
559-
"ใกล้ชิดประชาชนดี :หน้ายิ้มยิงฟัน:"
560-
),
541+
("จะมานั่งรถเมล์เหมือนผมก็ได้นะครับ "
542+
"ใกล้ชิดประชาชนดี :หน้ายิ้มยิงฟัน:")
561543
)
562544
self.assertEqual(
563545
emoji_to_thai("หิวข้าวอยากกินอาหารญี่ปุ่น 🍣"),
564-
"หิวข้าวอยากกินอาหารญี่ปุ่น :ซูชิ:",
546+
"หิวข้าวอยากกินอาหารญี่ปุ่น :ซูชิ:"
565547
)
566548
self.assertEqual(
567549
emoji_to_thai("🇹🇭 นี่คิือธงประเทศไทย"),
568-
":ธง_ไทย: นี่คิือธงประเทศไทย",
550+
":ธง_ไทย: นี่คิือธงประเทศไทย"
569551
)

0 commit comments

Comments
 (0)