From 283b70634d0f694fa8354e629aadaadcdd0db18f Mon Sep 17 00:00:00 2001 From: Korakot Chaovavanich Date: Fri, 9 Oct 2020 13:21:49 +0700 Subject: [PATCH 1/5] Update trie.py --- pythainlp/util/trie.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/pythainlp/util/trie.py b/pythainlp/util/trie.py index fd06842e0..f793e2d84 100644 --- a/pythainlp/util/trie.py +++ b/pythainlp/util/trie.py @@ -40,6 +40,31 @@ def add(self, word: str) -> None: cur = child cur.end = True + def remove(self, word: str) -> None: + """ + Remove a word from the trie. + If the word is not found, do nothing. + + :param str text: a word + """ + # remove from set first + if word not in self.words: + return + self.words.remove(word) + # then remove from nodes + parent = self.root + threes = [] # track path to leaf + for ch in word: + child = parent.children[ch] + threes.append((parent, child, ch)) + parent = child + # remove the last one + child.end = False + # prune up the tree + for parent, child, ch in reversed(threes): + if child.end or child.children: break + del parent.children[ch] # remove from parent dict + def prefixes(self, text: str) -> List[str]: """ List all possible words from first sequence of characters in a word. From 8de1ee4a36ac1e62f6c6bad2a7b6306dcaf43480 Mon Sep 17 00:00:00 2001 From: Korakot Chaovavanich Date: Fri, 9 Oct 2020 13:27:04 +0700 Subject: [PATCH 2/5] move break to a line below. --- pythainlp/util/trie.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pythainlp/util/trie.py b/pythainlp/util/trie.py index f793e2d84..c9900c89d 100644 --- a/pythainlp/util/trie.py +++ b/pythainlp/util/trie.py @@ -62,7 +62,8 @@ def remove(self, word: str) -> None: child.end = False # prune up the tree for parent, child, ch in reversed(threes): - if child.end or child.children: break + if child.end or child.children: + break del parent.children[ch] # remove from parent dict def prefixes(self, text: str) -> List[str]: From e692720cad106e94dcf806bd4062493f151e8dfc Mon Sep 17 00:00:00 2001 From: Arthit Suriyawongkul Date: Fri, 9 Oct 2020 10:06:48 +0100 Subject: [PATCH 3/5] Remove trailing space --- pythainlp/util/trie.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pythainlp/util/trie.py b/pythainlp/util/trie.py index c9900c89d..d4536b286 100644 --- a/pythainlp/util/trie.py +++ b/pythainlp/util/trie.py @@ -62,7 +62,7 @@ def remove(self, word: str) -> None: child.end = False # prune up the tree for parent, child, ch in reversed(threes): - if child.end or child.children: + if child.end or child.children: break del parent.children[ch] # remove from parent dict @@ -97,11 +97,11 @@ def __len__(self) -> int: def dict_trie(dict_source: Union[str, Iterable[str], Trie]) -> Trie: """ - Create a dictionary trie from a string or an iterable. + Create a dictionary trie from a file or an iterable. :param str|Iterable[str]|pythainlp.util.Trie dict_source: a path to dictionary file or a list of words or a pythainlp.util.Trie object - :return: a trie object created from a dictionary input + :return: a trie object :rtype: pythainlp.util.Trie """ trie = None From 5c9914bb12d1909420e5fcaebb787c23967f0c43 Mon Sep 17 00:00:00 2001 From: Arthit Suriyawongkul Date: Fri, 9 Oct 2020 10:27:21 +0100 Subject: [PATCH 4/5] Add test case for pythainlp.util.Trie.remove() --- tests/test_util.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/test_util.py b/tests/test_util.py index 6ab88674c..aca0db883 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -388,6 +388,15 @@ def test_trie(self): self.assertEqual(len(trie), 4) self.assertEqual(len(trie.prefixes("ทดสอบ")), 2) + trie.remove("ทบ") + trie.remove("ทด") + self.assertEqual(len(trie), 2) + + trie = Trie([]) + self.assertEqual(len(trie), 0) + trie.remove("หมด") + self.assertEqual(len(trie), 0) + self.assertIsNotNone(dict_trie(Trie(["ลอง", "ลาก"]))) self.assertIsNotNone(dict_trie(("ลอง", "สร้าง", "Trie", "ลน"))) self.assertIsNotNone(dict_trie(["ลอง", "สร้าง", "Trie", "ลน"])) From aaced0b2fa9143471479e3a5f3b97414fa16b044 Mon Sep 17 00:00:00 2001 From: Korakot Chaovavanich Date: Fri, 9 Oct 2020 16:52:14 +0700 Subject: [PATCH 5/5] Rename 'threes' to 'data' --- pythainlp/util/trie.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pythainlp/util/trie.py b/pythainlp/util/trie.py index d4536b286..a1648a929 100644 --- a/pythainlp/util/trie.py +++ b/pythainlp/util/trie.py @@ -53,15 +53,15 @@ def remove(self, word: str) -> None: self.words.remove(word) # then remove from nodes parent = self.root - threes = [] # track path to leaf + data = [] # track path to leaf for ch in word: child = parent.children[ch] - threes.append((parent, child, ch)) + data.append((parent, child, ch)) parent = child # remove the last one child.end = False # prune up the tree - for parent, child, ch in reversed(threes): + for parent, child, ch in reversed(data): if child.end or child.children: break del parent.children[ch] # remove from parent dict