|  | 
| 1 | 1 | # -*- coding: utf-8 -*- | 
| 2 | 2 | """ | 
| 3 |  | -Functions related to keyboard layout. | 
|  | 3 | +Correct text in one language that is incorrectly-typed | 
|  | 4 | +with a keyboard layout in another language. | 
| 4 | 5 | """ | 
| 5 |  | - | 
| 6 | 6 | EN_TH_KEYB_PAIRS = { | 
| 7 | 7 |     "Z": "(", | 
| 8 | 8 |     "z": "ผ", | 
|  | 
| 103 | 103 | EN_TH_TRANSLATE_TABLE = str.maketrans(EN_TH_KEYB_PAIRS) | 
| 104 | 104 | TH_EN_TRANSLATE_TABLE = str.maketrans(TH_EN_KEYB_PAIRS) | 
| 105 | 105 | 
 | 
| 106 |  | -TIS_820_2531_MOD = [ | 
| 107 |  | -  ["-", "ๅ", "/", "", "_", "ภ", "ถ", "ุ", "ึ", "ค", "ต", "จ", "ข", "ช"], | 
| 108 |  | -  ["ๆ", "ไ", "ำ", "พ", "ะ", "ั", "ี", "ร", "น", "ย", "บ", "ล", "ฃ"], | 
| 109 |  | -  ["ฟ", "ห", "ก", "ด", "เ", "้", "่", "า", "ส", "ว", "ง"], | 
| 110 |  | -  ["ผ", "ป", "แ", "อ", "ิ", "ื", "ท", "ม", "ใ", "ฝ"], | 
| 111 |  | -] | 
| 112 |  | -TIS_820_2531_MOD_SHIFT = [ | 
| 113 |  | -  ["%", "+", "๑", "๒", "๓", "๔", "ู", "฿", "๕", "๖", "๗", "๘", "๙"], | 
| 114 |  | -  ["๐", "\"", "ฎ", "ฑ", "ธ", "ํ", "๊", "ณ", "ฯ", "ญ", "ฐ", ",", "ฅ"], | 
| 115 |  | -  ["ฤ", "ฆ", "ฏ", "โ", "ฌ", "็", "๋", "ษ", "ศ", "ซ", "."], | 
| 116 |  | -  ["(", ")", "ฉ", "ฮ", "ฺ", "์", "?", "ฒ", "ฬ", "ฦ"], | 
| 117 |  | -] | 
| 118 |  | - | 
| 119 | 106 | 
 | 
| 120 | 107 | def eng_to_thai(text: str) -> str: | 
| 121 | 108 |     """ | 
| @@ -161,63 +148,3 @@ def thai_to_eng(text: str) -> str: | 
| 161 | 148 |         # output: 'Bank of Thailand' | 
| 162 | 149 |     """ | 
| 163 | 150 |     return text.translate(TH_EN_TRANSLATE_TABLE) | 
| 164 |  | - | 
| 165 |  | - | 
| 166 |  | -def thai_keyboard_dist(c1: str, c2: str, shift_dist: float = 0.0) -> float: | 
| 167 |  | -    """ | 
| 168 |  | -    Calculate euclidean distance between two Thai characters | 
| 169 |  | -    according to their location on a Thai keyboard layout. | 
| 170 |  | -
 | 
| 171 |  | -    A modified TIS 820-2531 standard keyboard layout, which is developed | 
| 172 |  | -    from Kedmanee layout and is the most commonly used Thai keyboard layout, | 
| 173 |  | -    is used in distance calculation. | 
| 174 |  | -
 | 
| 175 |  | -    The modified TIS 820-2531 is TIS 820-2531 with few key extensions | 
| 176 |  | -    proposed in TIS 820-2536 draft. See Figure 4, notice grey keys, in | 
| 177 |  | -    https://www.nectec.or.th/it-standards/keyboard_layout/thai-key.html | 
| 178 |  | -
 | 
| 179 |  | -    Noted that the latest TIS 820-2538 has slight changes in layout from | 
| 180 |  | -    TIS 820-2531. See Figure 2, notice the Thai Baht sign and ฅ-ฃ pair, in | 
| 181 |  | -    https://www.nectec.or.th/it-standards/std820/std820.html | 
| 182 |  | -    Since TIS 820-2538 is not widely adopted by keyboard manufacturer, | 
| 183 |  | -    this function uses the de facto standard modified TIS 820-2531 instead. | 
| 184 |  | -
 | 
| 185 |  | -    :param str c1: first character | 
| 186 |  | -    :param str c2: second character | 
| 187 |  | -    :param str shift_dist: return value if they're shifted | 
| 188 |  | -    :return: euclidean distance between two characters | 
| 189 |  | -    :rtype: float | 
| 190 |  | -
 | 
| 191 |  | -    :Example: | 
| 192 |  | -
 | 
| 193 |  | -        from pythainlp.util import thai_keyboard_dist | 
| 194 |  | -        thai_keyboard_dist("ด", "ะ") | 
| 195 |  | -        # output: 1.4142135623730951 | 
| 196 |  | -        thai_keyboard_dist("ฟ", "ฤ") | 
| 197 |  | -        # output: 0.0 | 
| 198 |  | -        thai_keyboard_dist("ฟ", "ห") | 
| 199 |  | -        # output: 1.0 | 
| 200 |  | -        thai_keyboard_dist("ฟ", "ก") | 
| 201 |  | -        # output: 2.0 | 
| 202 |  | -        thai_keyboard_dist("ฟ", "ฤ", 0.5) | 
| 203 |  | -        # output: 0.5 | 
| 204 |  | -    """ | 
| 205 |  | -    def get_char_coord( | 
| 206 |  | -        ch: str, layouts=[TIS_820_2531_MOD, TIS_820_2531_MOD_SHIFT] | 
| 207 |  | -    ): | 
| 208 |  | -        for layout in layouts: | 
| 209 |  | -            for row in layout: | 
| 210 |  | -                if ch in row: | 
| 211 |  | -                    r = layout.index(row) | 
| 212 |  | -                    c = row.index(ch) | 
| 213 |  | -                    return (r, c) | 
| 214 |  | -        raise ValueError(ch + " not found in given keyboard layout") | 
| 215 |  | - | 
| 216 |  | -    coord1 = get_char_coord(c1) | 
| 217 |  | -    coord2 = get_char_coord(c2) | 
| 218 |  | -    distance = ( | 
| 219 |  | -        (coord1[0] - coord2[0]) ** 2 + (coord1[1] - coord2[1]) ** 2 | 
| 220 |  | -    ) ** (0.5) | 
| 221 |  | -    if distance == 0 and c1 != c2: | 
| 222 |  | -        return shift_dist | 
| 223 |  | -    return distance | 
0 commit comments