From 2899ad8d58d767f67b6cbfc5ebdda5ac1475d77f Mon Sep 17 00:00:00 2001 From: Wannaphong Phatthiyaphaibun Date: Wed, 13 Dec 2023 00:50:33 +0700 Subject: [PATCH 1/6] Add pythainlp.util.morse --- docs/api/util.rst | 10 ++ pythainlp/util/morse.py | 209 ++++++++++++++++++++++++++++++++++++++++ tests/test_util.py | 9 ++ 3 files changed, 228 insertions(+) create mode 100644 pythainlp/util/morse.py diff --git a/docs/api/util.rst b/docs/api/util.rst index bb7efbfd3..063fd1ab1 100644 --- a/docs/api/util.rst +++ b/docs/api/util.rst @@ -277,3 +277,13 @@ Modules :members: The `Trie` class is a data structure for efficient dictionary operations. It's a valuable resource for managing and searching word lists and dictionaries in a structured and efficient manner. + +.. autofunction:: pythainlp.util.morse.morse_encode + :noindex: + + The `pythainlp.util.morse.morse_encode` function is convert text to Morse code. + +.. autofunction:: pythainlp.util.morse.morse_decode + :noindex: + + The `pythainlp.util.morse.morse_decode` function is convert Morse code to text. diff --git a/pythainlp/util/morse.py b/pythainlp/util/morse.py new file mode 100644 index 000000000..d32c66cd7 --- /dev/null +++ b/pythainlp/util/morse.py @@ -0,0 +1,209 @@ +# -*- coding: utf-8 -*- +# SPDX-FileCopyrightText: Copyright 2016-2023 PyThaiNLP Project +# SPDX-License-Identifier: Apache-2.0 +THAI_MORSE_CODE = { + 'ก': '--.', + 'ข': '-.-.', + 'ค': '-.-', + 'ฆ': '-.-', + 'ง': '-.--.', + 'จ': '-..-.', + 'ฉ': '----', + 'ช': '-..-', + 'ฌ':'-..-', + 'ซ': '--..', + 'ญ': '.---', + 'ด': '-..', + 'ถ': '-.-..', + 'ฐ': '-.-..', + 'ฑ': '-..--', + 'ฒ': '-..--', + 'ท': '-..--', + 'ธ': '-..--', + 'ณ': '-.', + 'น': '-.', + 'บ': '-...', + 'ป': '.--.', + 'ผ': '--.-', + 'ฝ': '-.-.-', + 'พ': '.--..', + 'ภ': '.--..', + 'ฟ': '..-.', + 'ม': '--', + 'ย': '-.--', + 'ร': '.-.', + 'ล': '.-..', + 'ฬ': '.-..', + 'ว': '.--', + 'ศ': '...', + 'ษ': '...', + 'ส': '...', + 'ห': '....', + 'อ': '-...-', + 'ฮ': '--.--', + 'ฎ': '-..', + 'ต': '-', + 'ฏ': '-', + 'ฤ': '.-.--', + '่': '..-', + '้': '...-', + '๊': '--...', + '๋': '.-.-.', + 'ั': '.--.-', + '็': '---..', + '์': '--..-', + 'ั้': '.---.', + 'ฯ': '--.-.', + 'ฯลฯ': '---.-', + 'ๆ': '---.-', + 'ะ': '.-...', + 'า': '.-', + 'ิ': '..-..', + 'ี': '..', + 'ึ': '..--.', + 'ื': '..--', + 'ุ': '..-.-', + 'ู': '---.', + 'เ': '.', + 'แ': '.-.-', + 'โ': '---', + 'ไ': '.-..-', + 'ใ': '.-..-', + 'ำ': '...-.', + 'อ': '-...-' +} +ENGLISH_MORSE_CODE = { + 'A': '.-', + 'B': '-...', + 'C': '-.-.', + 'D': '-..', + 'E': '.', + 'F': '..-.', + 'G': '--.', + 'H': '....', + 'I': '..', + 'J': '.---', + 'K': '-.-', + 'L': '.-..', + 'M': '--', + 'N': '-.', + 'O': '---', + 'P': '.--.', + 'Q': '--.-', + 'R': '.-.', + 'S': '...', + 'T': '-', + 'U': '..-', + 'V': '...-', + 'W': '.--', + 'X': '-..-', + 'Y': '-.--', + 'Z': '--..', + '0': '-----', + ',': '--..--', + '1': '.----', + '.': '.-.-.-', + '2': '..---', + '?': '..--..', + '3': '...--', + ';': '-.-.-.', + '4': '....-', + ':': '---...', + '5': '.....', + "'": '.----.', + '6': '-....', + '-': '-....-', + '7': '--...', + '/': '-..-.', + '8': '---..', + '(': '-.--.-' +} + +decodingeng = {} #สร้าง Dictionary สำหรับใช้ถอดรหัสมอร์สภาษาอังกฤษ +for key, val in ENGLISH_MORSE_CODE.items(): + decodingeng[val] = key +decodingthai = {} +for key, val in THAI_MORSE_CODE.items(): + decodingthai[val.replace(" ","")] = key +for key, val in THAI_MORSE_CODE.items(): + THAI_MORSE_CODE[key] = val.replace(" ","") + + +def morse_encode(text: str, lang: str="th") -> str: + """ + Convert text to Morse code (support Thai and English) + + :param str text: Text + :param str lang: Language Code (*th* is Thai and *en* is English) + :return: Morse code + :rtype: str + + :Example: + :: + from pythainlp.util.morse import morse_encode + print(morse_encode("แมว", lang="th")) + # output: .-.- -- .-- + + print(morse_encode("cat", lang="en")) + # output: -.-. .- - + """ + if lang == "th": # Thai + return ' '.join( + map( + lambda x, + g=THAI_MORSE_CODE.get: g(x, ' '), + text.upper() + ) + ) + elif lang == "en": # English + return ' '.join( + map( + lambda x, + g=ENGLISH_MORSE_CODE.get: g(x, ' '), + text.upper() + ) + ) + else: + raise NotImplementedError(f"This function doesn't support {lang}.") + + +def morse_decode(morse_text: str, lang: str="th") -> str: + """ + Simple Convert Morse code to text + + Thai still have some wrong character problem that\ + can fix by spell corrector. + + :param str morse_text: Morse code + :param str lang: Language Code (*th* is Thai and *en* is English) + :return: Text + :rtype: str + + :Example: + :: + from pythainlp.util.morse import morse_decode + print(morse_decode(".-.- -- .--", lang="th")) + # output: แมว + + print(morse_decode("-.-. .- -", lang="en")) + # output: CAT + """ + if lang == "th": + ans = ''.join( + map( + lambda x, + g=decodingthai.get: g(x, ''), + morse_text.split(' ')) + ) + return ''.join(ans.split()) + elif lang == "en": + ans = ''.join( + map( + lambda x, + g=decodingeng.get: g(x, ' '), + morse_text.split(' ') + ) + ) + return ' '.join(ans.split()) + else: + raise NotImplementedError(f"This function doesn't support {lang}.") diff --git a/tests/test_util.py b/tests/test_util.py index 9d821ff01..81a2e0284 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -67,6 +67,7 @@ remove_trailing_repeat_consonants, ) from pythainlp.util.spell_words import spell_word +from pythainlp.util.morse import morse_encode, morse_decode class TestUtilPackage(unittest.TestCase): @@ -835,5 +836,13 @@ def test_remove_repeat_consonants(self): "อืมมม คุณมีบุคลิกที่เริ่ด ฉันจะให้เกรดดีกับคุณ\nนี่เป็นความลับ", ) + def test_morse_encode(self): + self.assertEqual(morse_encode("แมว", lang="th"), ".-.- -- .--") + self.assertEqual(morse_encode("cat", lang="en"), "-.-. .- -") + + def test_morse_decode(self): + self.assertEqual(morse_decode(".-.- -- .--", lang="th"), "แมว") + self.assertEqual(morse_decode("-.-. .- -", lang="en"), "CAT") + # def test_abbreviation_to_full_text(self): # self.assertIsInstance(abbreviation_to_full_text("รร.ของเราน่าอยู่", list)) From 404e402b58506b13866ae8069b123fd62b717ced Mon Sep 17 00:00:00 2001 From: Wannaphong Phatthiyaphaibun Date: Wed, 13 Dec 2023 00:57:40 +0700 Subject: [PATCH 2/6] Update morse.py --- pythainlp/util/morse.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pythainlp/util/morse.py b/pythainlp/util/morse.py index d32c66cd7..d6b3b457b 100644 --- a/pythainlp/util/morse.py +++ b/pythainlp/util/morse.py @@ -10,7 +10,7 @@ 'จ': '-..-.', 'ฉ': '----', 'ช': '-..-', - 'ฌ':'-..-', + 'ฌ': '-..-', 'ซ': '--..', 'ญ': '.---', 'ด': '-..', @@ -119,7 +119,7 @@ '(': '-.--.-' } -decodingeng = {} #สร้าง Dictionary สำหรับใช้ถอดรหัสมอร์สภาษาอังกฤษ +decodingeng = {} for key, val in ENGLISH_MORSE_CODE.items(): decodingeng[val] = key decodingthai = {} @@ -129,7 +129,7 @@ THAI_MORSE_CODE[key] = val.replace(" ","") -def morse_encode(text: str, lang: str="th") -> str: +def morse_encode(text: str, lang: str = "th") -> str: """ Convert text to Morse code (support Thai and English) @@ -167,7 +167,7 @@ def morse_encode(text: str, lang: str="th") -> str: raise NotImplementedError(f"This function doesn't support {lang}.") -def morse_decode(morse_text: str, lang: str="th") -> str: +def morse_decode(morse_text: str, lang: str = "th") -> str: """ Simple Convert Morse code to text From 36e03381337981c698e30e95f89d632fe2aa0d55 Mon Sep 17 00:00:00 2001 From: Wannaphong Phatthiyaphaibun Date: Wed, 13 Dec 2023 00:58:34 +0700 Subject: [PATCH 3/6] Fixed pep8 --- pythainlp/util/morse.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pythainlp/util/morse.py b/pythainlp/util/morse.py index d6b3b457b..fdb5eba64 100644 --- a/pythainlp/util/morse.py +++ b/pythainlp/util/morse.py @@ -124,9 +124,9 @@ decodingeng[val] = key decodingthai = {} for key, val in THAI_MORSE_CODE.items(): - decodingthai[val.replace(" ","")] = key + decodingthai[val.replace(" ", "")] = key for key, val in THAI_MORSE_CODE.items(): - THAI_MORSE_CODE[key] = val.replace(" ","") + THAI_MORSE_CODE[key] = val.replace(" ", "") def morse_encode(text: str, lang: str = "th") -> str: From 0fd8bd8645ff9f210d35737318f6c5782838a409 Mon Sep 17 00:00:00 2001 From: Wannaphong Phatthiyaphaibun Date: Wed, 13 Dec 2023 01:02:39 +0700 Subject: [PATCH 4/6] Update morse.py --- pythainlp/util/morse.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pythainlp/util/morse.py b/pythainlp/util/morse.py index fdb5eba64..22c88dce5 100644 --- a/pythainlp/util/morse.py +++ b/pythainlp/util/morse.py @@ -39,7 +39,6 @@ 'ษ': '...', 'ส': '...', 'ห': '....', - 'อ': '-...-', 'ฮ': '--.--', 'ฎ': '-..', 'ต': '-', From 21d0a2b1592c862e52b619af05e99c6283bdb12e Mon Sep 17 00:00:00 2001 From: Arthit Suriyawongkul Date: Wed, 13 Dec 2023 07:57:34 +0000 Subject: [PATCH 5/6] Update morse.py - formatting --- pythainlp/util/morse.py | 263 +++++++++++++++++++--------------------- 1 file changed, 126 insertions(+), 137 deletions(-) diff --git a/pythainlp/util/morse.py b/pythainlp/util/morse.py index 22c88dce5..286edcb6a 100644 --- a/pythainlp/util/morse.py +++ b/pythainlp/util/morse.py @@ -1,129 +1,133 @@ # -*- coding: utf-8 -*- # SPDX-FileCopyrightText: Copyright 2016-2023 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 + THAI_MORSE_CODE = { - 'ก': '--.', - 'ข': '-.-.', - 'ค': '-.-', - 'ฆ': '-.-', - 'ง': '-.--.', - 'จ': '-..-.', - 'ฉ': '----', - 'ช': '-..-', - 'ฌ': '-..-', - 'ซ': '--..', - 'ญ': '.---', - 'ด': '-..', - 'ถ': '-.-..', - 'ฐ': '-.-..', - 'ฑ': '-..--', - 'ฒ': '-..--', - 'ท': '-..--', - 'ธ': '-..--', - 'ณ': '-.', - 'น': '-.', - 'บ': '-...', - 'ป': '.--.', - 'ผ': '--.-', - 'ฝ': '-.-.-', - 'พ': '.--..', - 'ภ': '.--..', - 'ฟ': '..-.', - 'ม': '--', - 'ย': '-.--', - 'ร': '.-.', - 'ล': '.-..', - 'ฬ': '.-..', - 'ว': '.--', - 'ศ': '...', - 'ษ': '...', - 'ส': '...', - 'ห': '....', - 'ฮ': '--.--', - 'ฎ': '-..', - 'ต': '-', - 'ฏ': '-', - 'ฤ': '.-.--', - '่': '..-', - '้': '...-', - '๊': '--...', - '๋': '.-.-.', - 'ั': '.--.-', - '็': '---..', - '์': '--..-', - 'ั้': '.---.', - 'ฯ': '--.-.', - 'ฯลฯ': '---.-', - 'ๆ': '---.-', - 'ะ': '.-...', - 'า': '.-', - 'ิ': '..-..', - 'ี': '..', - 'ึ': '..--.', - 'ื': '..--', - 'ุ': '..-.-', - 'ู': '---.', - 'เ': '.', - 'แ': '.-.-', - 'โ': '---', - 'ไ': '.-..-', - 'ใ': '.-..-', - 'ำ': '...-.', - 'อ': '-...-' + "ก": "--.", + "ข": "-.-.", + "ค": "-.-", + "ฆ": "-.-", + "ง": "-.--.", + "จ": "-..-.", + "ฉ": "----", + "ช": "-..-", + "ฌ": "-..-", + "ซ": "--..", + "ญ": ".---", + "ด": "-..", + "ถ": "-.-..", + "ฐ": "-.-..", + "ฑ": "-..--", + "ฒ": "-..--", + "ท": "-..--", + "ธ": "-..--", + "ณ": "-.", + "น": "-.", + "บ": "-...", + "ป": ".--.", + "ผ": "--.-", + "ฝ": "-.-.-", + "พ": ".--..", + "ภ": ".--..", + "ฟ": "..-.", + "ม": "--", + "ย": "-.--", + "ร": ".-.", + "ล": ".-..", + "ฬ": ".-..", + "ว": ".--", + "ศ": "...", + "ษ": "...", + "ส": "...", + "ห": "....", + "ฮ": "--.--", + "ฎ": "-..", + "ต": "-", + "ฏ": "-", + "ฤ": ".-.--", + "่": "..-", + "้": "...-", + "๊": "--...", + "๋": ".-.-.", + "ั": ".--.-", + "็": "---..", + "์": "--..-", + "ั้": ".---.", + "ฯ": "--.-.", + "ฯลฯ": "---.-", + "ๆ": "---.-", + "ะ": ".-...", + "า": ".-", + "ิ": "..-..", + "ี": "..", + "ึ": "..--.", + "ื": "..--", + "ุ": "..-.-", + "ู": "---.", + "เ": ".", + "แ": ".-.-", + "โ": "---", + "ไ": ".-..-", + "ใ": ".-..-", + "ำ": "...-.", + "อ": "-...-", } + ENGLISH_MORSE_CODE = { - 'A': '.-', - 'B': '-...', - 'C': '-.-.', - 'D': '-..', - 'E': '.', - 'F': '..-.', - 'G': '--.', - 'H': '....', - 'I': '..', - 'J': '.---', - 'K': '-.-', - 'L': '.-..', - 'M': '--', - 'N': '-.', - 'O': '---', - 'P': '.--.', - 'Q': '--.-', - 'R': '.-.', - 'S': '...', - 'T': '-', - 'U': '..-', - 'V': '...-', - 'W': '.--', - 'X': '-..-', - 'Y': '-.--', - 'Z': '--..', - '0': '-----', - ',': '--..--', - '1': '.----', - '.': '.-.-.-', - '2': '..---', - '?': '..--..', - '3': '...--', - ';': '-.-.-.', - '4': '....-', - ':': '---...', - '5': '.....', - "'": '.----.', - '6': '-....', - '-': '-....-', - '7': '--...', - '/': '-..-.', - '8': '---..', - '(': '-.--.-' + "A": ".-", + "B": "-...", + "C": "-.-.", + "D": "-..", + "E": ".", + "F": "..-.", + "G": "--.", + "H": "....", + "I": "..", + "J": ".---", + "K": "-.-", + "L": ".-..", + "M": "--", + "N": "-.", + "O": "---", + "P": ".--.", + "Q": "--.-", + "R": ".-.", + "S": "...", + "T": "-", + "U": "..-", + "V": "...-", + "W": ".--", + "X": "-..-", + "Y": "-.--", + "Z": "--..", + "0": "-----", + ",": "--..--", + "1": ".----", + ".": ".-.-.-", + "2": "..---", + "?": "..--..", + "3": "...--", + ";": "-.-.-.", + "4": "....-", + ":": "---...", + "5": ".....", + "'": ".----.", + "6": "-....", + "-": "-....-", + "7": "--...", + "/": "-..-.", + "8": "---..", + "(": "-.--.-", } decodingeng = {} for key, val in ENGLISH_MORSE_CODE.items(): decodingeng[val] = key + decodingthai = {} for key, val in THAI_MORSE_CODE.items(): decodingthai[val.replace(" ", "")] = key + for key, val in THAI_MORSE_CODE.items(): THAI_MORSE_CODE[key] = val.replace(" ", "") @@ -147,20 +151,12 @@ def morse_encode(text: str, lang: str = "th") -> str: # output: -.-. .- - """ if lang == "th": # Thai - return ' '.join( - map( - lambda x, - g=THAI_MORSE_CODE.get: g(x, ' '), - text.upper() - ) + return " ".join( + map(lambda x, g=THAI_MORSE_CODE.get: g(x, " "), text.upper()) ) elif lang == "en": # English - return ' '.join( - map( - lambda x, - g=ENGLISH_MORSE_CODE.get: g(x, ' '), - text.upper() - ) + return " ".join( + map(lambda x, g=ENGLISH_MORSE_CODE.get: g(x, " "), text.upper()) ) else: raise NotImplementedError(f"This function doesn't support {lang}.") @@ -188,21 +184,14 @@ def morse_decode(morse_text: str, lang: str = "th") -> str: # output: CAT """ if lang == "th": - ans = ''.join( - map( - lambda x, - g=decodingthai.get: g(x, ''), - morse_text.split(' ')) + ans = "".join( + map(lambda x, g=decodingthai.get: g(x, ""), morse_text.split(" ")) ) - return ''.join(ans.split()) + return "".join(ans.split()) elif lang == "en": - ans = ''.join( - map( - lambda x, - g=decodingeng.get: g(x, ' '), - morse_text.split(' ') - ) + ans = "".join( + map(lambda x, g=decodingeng.get: g(x, " "), morse_text.split(" ")) ) - return ' '.join(ans.split()) + return " ".join(ans.split()) else: raise NotImplementedError(f"This function doesn't support {lang}.") From 5079d3f7daae29e3fdcb018a7645897e6d4ffeb6 Mon Sep 17 00:00:00 2001 From: Arthit Suriyawongkul Date: Wed, 13 Dec 2023 08:05:10 +0000 Subject: [PATCH 6/6] Sort import test_util.py --- tests/test_util.py | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/tests/test_util.py b/tests/test_util.py index 81a2e0284..9fc8d6cf1 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -18,14 +18,16 @@ arabic_digit_to_thai_digit, bahttext, collate, - countthai, + convert_years, count_thai_chars, + countthai, dict_trie, display_thai_char, digit_to_text, emoji_to_thai, eng_to_thai, find_keyword, + ipa_to_rtgs, is_native_thai, isthai, isthaichar, @@ -33,41 +35,39 @@ now_reign_year, num_to_thaiword, maiyamok, + nectec_to_ipa, rank, reign_year_to_ad, remove_dangling, remove_dup_spaces, + remove_tone_ipa, remove_tonemark, + remove_trailing_repeat_consonants, remove_zw, rhyme, text_to_arabic_digit, + text_to_num, text_to_thai_digit, - thaiword_to_date, thai_digit_to_arabic_digit, + thai_keyboard_dist, + thai_to_eng, thai_strftime, + thai_strptime, + thai_word_tone_detector, + thaiword_to_date, + thaiword_to_num, thaiword_to_time, time_to_thaiword, - thai_to_eng, + tis620_to_utf8, to_idna, - thaiword_to_num, - thai_keyboard_dist, - text_to_num, - words_to_num, + tone_detector, sound_syllable, syllable_length, syllable_open_close_detector, - tone_detector, - thai_word_tone_detector, - convert_years, - thai_strptime, - nectec_to_ipa, - ipa_to_rtgs, - remove_tone_ipa, - tis620_to_utf8, - remove_trailing_repeat_consonants, + words_to_num, ) +from pythainlp.util.morse import morse_decode, morse_encode from pythainlp.util.spell_words import spell_word -from pythainlp.util.morse import morse_encode, morse_decode class TestUtilPackage(unittest.TestCase):