Skip to content
7 changes: 6 additions & 1 deletion pythainlp/util/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
"text_to_arabic_digit",
"text_to_thai_digit",
"thai_digit_to_arabic_digit",
"thai_keyboard_dist",
"thai_strftime",
"thai_time",
"thai_to_eng",
Expand All @@ -54,7 +55,11 @@
text_to_thai_digit,
thai_digit_to_arabic_digit,
)
from pythainlp.util.keyboard import eng_to_thai, thai_to_eng
from pythainlp.util.keyboard import (
eng_to_thai,
thai_keyboard_dist,
thai_to_eng,
)
from pythainlp.util.keywords import find_keyword, rank
from pythainlp.util.normalize import (
delete_tone,
Expand Down
53 changes: 53 additions & 0 deletions pythainlp/util/keyboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,20 @@
EN_TH_TRANSLATE_TABLE = str.maketrans(EN_TH_KEYB_PAIRS)
TH_EN_TRANSLATE_TABLE = str.maketrans(TH_EN_KEYB_PAIRS)

TIS_820_2538 = [
"-", "ๅ", "/", "_", "ภ", "ถ", "ุ", "ึ", "ค", "ต", "จ", "ข", "ช",
"ๆ", "ไ", "ำ", "พ", "ะ", "ั", "ี", "ร", "น", "ย", "บ", "ล", "ฃ",
"ฟ", "ห", "ก", "ด", "เ", "้", "่", "า", "ส", "ว", "ง",
"ผ", "ป", "แ", "อ", "ิ", "ื", "ท", "ม", "ใ", "ฝ"
]

TIS_820_2538_SHIFT = [
"%", "+", "๑", "๒", "๓", "๔", "ู", "฿", "๕", "๖", "๗", "๘", "๙",
"๐", "\"", "ฎ", "ฑ", "ธ", "ํ", "๊", "ณ", "ฯ", "ญ", "ฐ", ",", "ฅ",
"ฤ", "ฆ", "ฏ", "โ", "ฌ", "็", "๋", "ษ", "ศ", "ซ", ".",
"(", ")", "ฉ", "ฮ", "ฺ", "์", "?", "ฒ", "ฬ", "ฦ"
]


def eng_to_thai(text: str) -> str:
"""
Expand Down Expand Up @@ -148,3 +162,42 @@ def thai_to_eng(text: str) -> str:
# output: 'Bank of Thailand'
"""
return text.translate(TH_EN_TRANSLATE_TABLE)


def thai_keyboard_dist(c1: str, c2: str, shift_dist: float = 0.0) -> float:
"""
Calculate euclidean distance between two Thai characters

:param str c1: first character
:param str c2: second character
:param str shift_dist: return value if they're shifted
:return: euclidean distance between two characters
:rtype: float

:Example:

from pythainlp.util import thai_keyboard_dist
thai_keyboard_dist("ฟ", "ฤ")
# output: 0.0
thai_keyboard_dist("ฟ", "ห")
# output: 1.0
thai_keyboard_dist("ฟ", "ก")
# output: 2.0
thai_keyboard_dist("ฟ", "ฤ", 0.5)
# output: 0.5
"""
def get_char_coord(ch: str, layouts=[TIS_820_2538, TIS_820_2538_SHIFT]):
for layout in layouts:
for row in layout:
if ch in row:
r = layout.index(row)
c = row.index(ch)
return (r, c)
raise ValueError(c + " not found in given keyboard layout")

coord1 = get_char_coord(c1)
coord2 = get_char_coord(c2)
distance = ((coord1[0] - coord2[0])**2 + (coord1[1] - coord2[1])**2)**(0.5)
if distance == 0 and c1 != c2:
return shift_dist
return distance
9 changes: 9 additions & 0 deletions tests/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
time_to_thaiword,
thai_to_eng,
thaiword_to_num,
thai_keyboard_dist,
)


Expand Down Expand Up @@ -156,6 +157,14 @@ def test_rank(self):
rank(["แมว", "คน", "แมว"], exclude_stopwords=True)
)

# ### pythainlp.util.keyboard

def test_thai_keyboard_dist(self):
self.assertEqual(thai_keyboard_dist("ฟ", "ฤ"), 0.0)
self.assertEqual(thai_keyboard_dist("ฟ", "ห"), 1.0)
self.assertEqual(thai_keyboard_dist("ฟ", "ก"), 2.0)
self.assertEqual(thai_keyboard_dist("ฟ", "ฤ", 0.5), 0.5)

# ### pythainlp.util.date

def test_date(self):
Expand Down