From 830edab5b2356d97c0427b5339caf6c4c305e9db Mon Sep 17 00:00:00 2001 From: Arthit Suriyawongkul Date: Thu, 17 Sep 2020 08:51:09 +0100 Subject: [PATCH] Fix remove_repeat_vowels() bug that remove spaces between vowel MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It was mistakenly convert string like "ะ า" to "ะา". --- pythainlp/util/normalize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pythainlp/util/normalize.py b/pythainlp/util/normalize.py index da1b9bc46..441182d21 100644 --- a/pythainlp/util/normalize.py +++ b/pythainlp/util/normalize.py @@ -38,7 +38,7 @@ f"{follow_v}{lead_v}{above_v}{below_v}\u0e3a\u0e4c\u0e4d\u0e4e" ) _NOREPEAT_PAIRS = list( - zip([f"({ch}[ ]*)+" for ch in _NOREPEAT_CHARS], _NOREPEAT_CHARS) + zip([f"({ch}[ ]*)+{ch}" for ch in _NOREPEAT_CHARS], _NOREPEAT_CHARS) ) _RE_TONEMARKS = re.compile(f"[{tonemarks}]+")