diff --git a/docs/api/util.rst b/docs/api/util.rst index bb7efbfd3..063fd1ab1 100644 --- a/docs/api/util.rst +++ b/docs/api/util.rst @@ -277,3 +277,13 @@ Modules :members: The `Trie` class is a data structure for efficient dictionary operations. It's a valuable resource for managing and searching word lists and dictionaries in a structured and efficient manner. + +.. autofunction:: pythainlp.util.morse.morse_encode + :noindex: + + The `pythainlp.util.morse.morse_encode` function is convert text to Morse code. + +.. autofunction:: pythainlp.util.morse.morse_decode + :noindex: + + The `pythainlp.util.morse.morse_decode` function is convert Morse code to text. diff --git a/pythainlp/util/morse.py b/pythainlp/util/morse.py new file mode 100644 index 000000000..286edcb6a --- /dev/null +++ b/pythainlp/util/morse.py @@ -0,0 +1,197 @@ +# -*- coding: utf-8 -*- +# SPDX-FileCopyrightText: Copyright 2016-2023 PyThaiNLP Project +# SPDX-License-Identifier: Apache-2.0 + +THAI_MORSE_CODE = { + "ก": "--.", + "ข": "-.-.", + "ค": "-.-", + "ฆ": "-.-", + "ง": "-.--.", + "จ": "-..-.", + "ฉ": "----", + "ช": "-..-", + "ฌ": "-..-", + "ซ": "--..", + "ญ": ".---", + "ด": "-..", + "ถ": "-.-..", + "ฐ": "-.-..", + "ฑ": "-..--", + "ฒ": "-..--", + "ท": "-..--", + "ธ": "-..--", + "ณ": "-.", + "น": "-.", + "บ": "-...", + "ป": ".--.", + "ผ": "--.-", + "ฝ": "-.-.-", + "พ": ".--..", + "ภ": ".--..", + "ฟ": "..-.", + "ม": "--", + "ย": "-.--", + "ร": ".-.", + "ล": ".-..", + "ฬ": ".-..", + "ว": ".--", + "ศ": "...", + "ษ": "...", + "ส": "...", + "ห": "....", + "ฮ": "--.--", + "ฎ": "-..", + "ต": "-", + "ฏ": "-", + "ฤ": ".-.--", + "่": "..-", + "้": "...-", + "๊": "--...", + "๋": ".-.-.", + "ั": ".--.-", + "็": "---..", + "์": "--..-", + "ั้": ".---.", + "ฯ": "--.-.", + "ฯลฯ": "---.-", + "ๆ": "---.-", + "ะ": ".-...", + "า": ".-", + "ิ": "..-..", + "ี": "..", + "ึ": "..--.", + "ื": "..--", + "ุ": "..-.-", + "ู": "---.", + "เ": ".", + "แ": ".-.-", + "โ": "---", + "ไ": ".-..-", + "ใ": ".-..-", + "ำ": "...-.", + "อ": "-...-", +} + +ENGLISH_MORSE_CODE = { + "A": ".-", + "B": "-...", + "C": "-.-.", + "D": "-..", + "E": ".", + "F": "..-.", + "G": "--.", + "H": "....", + "I": "..", + "J": ".---", + "K": "-.-", + "L": ".-..", + "M": "--", + "N": "-.", + "O": "---", + "P": ".--.", + "Q": "--.-", + "R": ".-.", + "S": "...", + "T": "-", + "U": "..-", + "V": "...-", + "W": ".--", + "X": "-..-", + "Y": "-.--", + "Z": "--..", + "0": "-----", + ",": "--..--", + "1": ".----", + ".": ".-.-.-", + "2": "..---", + "?": "..--..", + "3": "...--", + ";": "-.-.-.", + "4": "....-", + ":": "---...", + "5": ".....", + "'": ".----.", + "6": "-....", + "-": "-....-", + "7": "--...", + "/": "-..-.", + "8": "---..", + "(": "-.--.-", +} + +decodingeng = {} +for key, val in ENGLISH_MORSE_CODE.items(): + decodingeng[val] = key + +decodingthai = {} +for key, val in THAI_MORSE_CODE.items(): + decodingthai[val.replace(" ", "")] = key + +for key, val in THAI_MORSE_CODE.items(): + THAI_MORSE_CODE[key] = val.replace(" ", "") + + +def morse_encode(text: str, lang: str = "th") -> str: + """ + Convert text to Morse code (support Thai and English) + + :param str text: Text + :param str lang: Language Code (*th* is Thai and *en* is English) + :return: Morse code + :rtype: str + + :Example: + :: + from pythainlp.util.morse import morse_encode + print(morse_encode("แมว", lang="th")) + # output: .-.- -- .-- + + print(morse_encode("cat", lang="en")) + # output: -.-. .- - + """ + if lang == "th": # Thai + return " ".join( + map(lambda x, g=THAI_MORSE_CODE.get: g(x, " "), text.upper()) + ) + elif lang == "en": # English + return " ".join( + map(lambda x, g=ENGLISH_MORSE_CODE.get: g(x, " "), text.upper()) + ) + else: + raise NotImplementedError(f"This function doesn't support {lang}.") + + +def morse_decode(morse_text: str, lang: str = "th") -> str: + """ + Simple Convert Morse code to text + + Thai still have some wrong character problem that\ + can fix by spell corrector. + + :param str morse_text: Morse code + :param str lang: Language Code (*th* is Thai and *en* is English) + :return: Text + :rtype: str + + :Example: + :: + from pythainlp.util.morse import morse_decode + print(morse_decode(".-.- -- .--", lang="th")) + # output: แมว + + print(morse_decode("-.-. .- -", lang="en")) + # output: CAT + """ + if lang == "th": + ans = "".join( + map(lambda x, g=decodingthai.get: g(x, ""), morse_text.split(" ")) + ) + return "".join(ans.split()) + elif lang == "en": + ans = "".join( + map(lambda x, g=decodingeng.get: g(x, " "), morse_text.split(" ")) + ) + return " ".join(ans.split()) + else: + raise NotImplementedError(f"This function doesn't support {lang}.") diff --git a/tests/test_util.py b/tests/test_util.py index 9d821ff01..9fc8d6cf1 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -18,14 +18,16 @@ arabic_digit_to_thai_digit, bahttext, collate, - countthai, + convert_years, count_thai_chars, + countthai, dict_trie, display_thai_char, digit_to_text, emoji_to_thai, eng_to_thai, find_keyword, + ipa_to_rtgs, is_native_thai, isthai, isthaichar, @@ -33,39 +35,38 @@ now_reign_year, num_to_thaiword, maiyamok, + nectec_to_ipa, rank, reign_year_to_ad, remove_dangling, remove_dup_spaces, + remove_tone_ipa, remove_tonemark, + remove_trailing_repeat_consonants, remove_zw, rhyme, text_to_arabic_digit, + text_to_num, text_to_thai_digit, - thaiword_to_date, thai_digit_to_arabic_digit, + thai_keyboard_dist, + thai_to_eng, thai_strftime, + thai_strptime, + thai_word_tone_detector, + thaiword_to_date, + thaiword_to_num, thaiword_to_time, time_to_thaiword, - thai_to_eng, + tis620_to_utf8, to_idna, - thaiword_to_num, - thai_keyboard_dist, - text_to_num, - words_to_num, + tone_detector, sound_syllable, syllable_length, syllable_open_close_detector, - tone_detector, - thai_word_tone_detector, - convert_years, - thai_strptime, - nectec_to_ipa, - ipa_to_rtgs, - remove_tone_ipa, - tis620_to_utf8, - remove_trailing_repeat_consonants, + words_to_num, ) +from pythainlp.util.morse import morse_decode, morse_encode from pythainlp.util.spell_words import spell_word @@ -835,5 +836,13 @@ def test_remove_repeat_consonants(self): "อืมมม คุณมีบุคลิกที่เริ่ด ฉันจะให้เกรดดีกับคุณ\nนี่เป็นความลับ", ) + def test_morse_encode(self): + self.assertEqual(morse_encode("แมว", lang="th"), ".-.- -- .--") + self.assertEqual(morse_encode("cat", lang="en"), "-.-. .- -") + + def test_morse_decode(self): + self.assertEqual(morse_decode(".-.- -- .--", lang="th"), "แมว") + self.assertEqual(morse_decode("-.-. .- -", lang="en"), "CAT") + # def test_abbreviation_to_full_text(self): # self.assertIsInstance(abbreviation_to_full_text("รร.ของเราน่าอยู่", list))